# Healthcare AI Agent
- #### A Q&A device for querying healthcare-related information

This project aims to create a device that can answer healthcare-related questions. The information is based on a database containing healthcare guidelines, medical literature, and other relevant sources.

### Knowledge Base

### Necessary Libraries and dependencies

#### Libraries and tools

In [3]:
import io
import os
import re
import time
import tempfile
from dotenv import load_dotenv
import requests
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import openai
import streamlit as st
from streamlit_chat import message
from langdetect import detect  # Language detection library
import pinecone
from PyPDF2 import PdfReader
from typing import List, Dict
import tabulate
import pinecone

# Langchain imports
from langchain.memory import ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import Tool

from langchain.agents import initialize_agent, AgentType
from langchain_community.chat_models import ChatOpenAI
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain_core.tools.base import BaseTool
from pydantic import Field
from typing import Any

from langchain.agents import initialize_agent, AgentType
from langchain_community.chat_models import ChatOpenAI
from langchain_experimental.agents import create_pandas_dataframe_agent

import os
from langchain.embeddings.openai import OpenAIEmbeddings


from pinecone import Pinecone,ServerlessSpec

import time
from pinecone import Pinecone

import os
import re
import time
import numpy as np
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chains import RetrievalQA

from langchain.vectorstores import Pinecone

from langchain.prompts.chat import MessagesPlaceholder, ChatPromptTemplate
from langchain.schema import SystemMessage, HumanMessage

from langchain.tools import Tool


#### API keys and client initiation

In [4]:
# Load environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
pinecone.api_key = os.getenv("PINECONE_API_KEY")

## Load the DataFrame

In [5]:
import pandas as pd
csv_file='dataset/healthcare_dataset.csv'
#creating the document with Pandas.
df = pd.read_csv(csv_file)

In [6]:
df.head()

Unnamed: 0,Name,Age,Gender,Blood Type,Medical Condition,Date of Admission,Doctor,Hospital,Insurance Provider,Billing Amount,Room Number,Admission Type,Discharge Date,Medication,Test Results
0,Bobby JacksOn,30,Male,B-,Cancer,2024-01-31,Matthew Smith,Sons and Miller,Blue Cross,18856.281306,328,Urgent,2024-02-02,Paracetamol,Normal
1,LesLie TErRy,62,Male,A+,Obesity,2019-08-20,Samantha Davies,Kim Inc,Medicare,33643.327287,265,Emergency,2019-08-26,Ibuprofen,Inconclusive
2,DaNnY sMitH,76,Female,A-,Obesity,2022-09-22,Tiffany Mitchell,Cook PLC,Aetna,27955.096079,205,Emergency,2022-10-07,Aspirin,Normal
3,andrEw waTtS,28,Female,O+,Diabetes,2020-11-18,Kevin Wells,"Hernandez Rogers and Vang,",Medicare,37909.78241,450,Elective,2020-12-18,Ibuprofen,Abnormal
4,adrIENNE bEll,43,Female,AB+,Cancer,2022-09-19,Kathleen Hanna,White-White,Aetna,14238.317814,458,Urgent,2022-10-09,Penicillin,Abnormal


In [7]:
df.isnull().sum()

Name                  0
Age                   0
Gender                0
Blood Type            0
Medical Condition     0
Date of Admission     0
Doctor                0
Hospital              0
Insurance Provider    0
Billing Amount        0
Room Number           0
Admission Type        0
Discharge Date        0
Medication            0
Test Results          0
dtype: int64

## Tools

In [8]:
class HealthcareDataFrameTool(BaseTool):
    name: str = Field(default="Healthcare Data Analysis Tool")
    description: str = Field(default="Tool for analyzing healthcare dataset using pandas operations")
    df: pd.DataFrame = Field(description="Healthcare dataset as a pandas DataFrame")
    
    def _run(self, query: str) -> Any:
        """Execute pandas operations on the healthcare dataset"""
        try:
            # Add common analysis methods
            if "average" in query.lower() or "mean" in query.lower():
                if "billing" in query.lower():
                    return f"Average billing amount: ${self.df['Billing Amount'].mean():.2f}"
                elif "age" in query.lower():
                    return f"Average age: {self.df['Age'].mean():.1f} years"
            
            elif "count" in query.lower():
                if "patient" in query.lower():
                    return f"Total number of patients: {len(self.df)}"
                elif "condition" in query.lower():
                    return self.df['Medical Condition'].value_counts().to_string()
            
            elif "search" in query.lower():
                search_term = query.lower().split("search")[-1].strip()
                results = self.df[self.df['Name'].str.lower().str.contains(search_term, na=False)]
                return f"Found {len(results)} matching patients:\n{results[['Name', 'Age', 'Medical Condition']].to_string()}"
            
            return "Could not process the query. Please try rephrasing your question."
            
        except Exception as e:
            return f"Error analyzing data: {str(e)}"

In [9]:

# Initialize the LLM
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.2,  # Lower temperature for more focused responses
    openai_api_key=os.getenv("OPENAI_API_KEY")
)

# Create conversation memory
from langchain.memory import ConversationBufferMemory
conversational_memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Create a healthcare data tool instance
healthcare_tool = HealthcareDataFrameTool(df=df)

# Initialize agent with tools
agent_test = create_pandas_dataframe_agent(
    llm=llm,
    df = df,
    tools=[healthcare_tool],
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    max_iterations=3,
    allow_dangerous_code=True,  # Opt-in to allow dangerous code execution
)

# Example usage function
def query_healthcare_data(question: str) -> str:
    try:
        response = agent_test.run(question)
        return response
    except Exception as e:
        return f"Error processing question: {str(e)}"

# Test the improved agent
print(query_healthcare_data("What is the average billing amount?"))
print(query_healthcare_data("How many patients are there with cancer?"))

  llm = ChatOpenAI(
  conversational_memory = ConversationBufferMemory(


0          Bobby JacksOn   30    Male         B-            Cancer   
1           LesLie TErRy   62    Male         A+           Obesity   
2            DaNnY sMitH   76  Female         A-           Obesity   
3           andrEw waTtS   28  Female         O+          Diabetes   
4          adrIENNE bEll   43  Female        AB+            Cancer   
...                  ...  ...     ...        ...               ...   
55495  eLIZABeTH jaCkSOn   42  Female         O+            Asthma   
55496         KYle pEREz   61  Female        AB-           Obesity   
55497       HEATher WaNG   38  Female         B+      Hypertension   
55498     JENniFER JOneS   43    Male         O-         Arthritis   
55499       jAMES GARCiA   53  Female         O+         Arthritis   

      Date of Admission            Doctor                      Hospital  \
0            2024-01-31     Matthew Smith               Sons and Miller   
1            2019-08-20   Samantha Davies                       Kim Inc   
2   



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df['Billing Amount'].mean()"}`


[0m[36;1m[1;3m25539.316097211795[0m[32;1m[1;3mThe average billing amount is approximately 25,539.32.[0m

[1m> Finished chain.[0m
The average billing amount is approximately 25,539.32.


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "cancer_patients_count = df[df['Medical Condition'] == 'Cancer'].shape[0]\ncancer_patients_count"}`


[0m[36;1m[1;3m9227[0m[32;1m[1;3mThere are 9,227 patients with cancer in the dataset.[0m

[1m> Finished chain.[0m
There are 9,227 patients with cancer in the dataset.


In [10]:
print("Available columns:", df.columns.tolist())
print("\nSample data:")
print(df.head())

Available columns: ['Name', 'Age', 'Gender', 'Blood Type', 'Medical Condition', 'Date of Admission', 'Doctor', 'Hospital', 'Insurance Provider', 'Billing Amount', 'Room Number', 'Admission Type', 'Discharge Date', 'Medication', 'Test Results']

Sample data:
            Name  Age  Gender Blood Type Medical Condition Date of Admission  \
0  Bobby JacksOn   30    Male         B-            Cancer        2024-01-31   
1   LesLie TErRy   62    Male         A+           Obesity        2019-08-20   
2    DaNnY sMitH   76  Female         A-           Obesity        2022-09-22   
3   andrEw waTtS   28  Female         O+          Diabetes        2020-11-18   
4  adrIENNE bEll   43  Female        AB+            Cancer        2022-09-19   

             Doctor                    Hospital Insurance Provider  \
0     Matthew Smith             Sons and Miller         Blue Cross   
1   Samantha Davies                     Kim Inc           Medicare   
2  Tiffany Mitchell                    Cook PLC    

In [11]:
try:
    response = agent_test.invoke({
        "input": "how many patients name john?"
    })
    print(response["output"])
except Exception as e:
    print(f"Error executing query: {str(e)}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "john_count = df['Name'].str.lower().str.contains('john').sum()\njohn_count"}`


[0m[36;1m[1;3m1823[0m[32;1m[1;3mThere are 1,823 patients with the name "John" in the dataframe.[0m

[1m> Finished chain.[0m
There are 1,823 patients with the name "John" in the dataframe.


In [12]:
response = agent_test.invoke({"input": "Can you tell a prediction based on the condition what is the average  billing amount?"})
print(response["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df.groupby('Medical Condition')['Billing Amount'].mean()"}`
responded: To predict the average billing amount based on the medical condition, we can group the data by the "Medical Condition" column and then calculate the average of the "Billing Amount" for each group. Let's do that.

[0m[36;1m[1;3mMedical Condition
Arthritis       25497.327056
Asthma          25635.249359
Cancer          25161.792707
Diabetes        25638.405577
Hypertension    25497.095761
Obesity         25805.971259
Name: Billing Amount, dtype: float64[0m[32;1m[1;3mThe average billing amount based on the medical condition is as follows:

- Arthritis: $25,497.33
- Asthma: $25,635.25
- Cancer: $25,161.79
- Diabetes: $25,638.41
- Hypertension: $25,497.10
- Obesity: $25,805.97[0m

[1m> Finished chain.[0m
The average billing amount based on the medical condition is as follows:

- Arthritis: $25,497.33
- Asth

## Initialize the Embedding Model and Vector DB 

In [13]:
model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model=model_name,
    openai_api_key=os.getenv("OPENAI_API_KEY")
)

  embed = OpenAIEmbeddings(


In [15]:
from pinecone import Pinecone,ServerlessSpec

pinecone = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

spec=ServerlessSpec(
    cloud="aws",region='us-east-1'
)

In [16]:
# Create a Pinecone client
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Create a Pinecone index
index_name = "healthcare-qa-pdfs"
existing_indexes = pc.list_indexes()

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=spec
    )
    # Wait for the index to be created
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)
        
# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()

PineconeApiException: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2024-07', 'X-Cloud-Trace-Context': 'd45cacb8e8a69b9f0cddce824527b164', 'Date': 'Sat, 15 Feb 2025 13:46:46 GMT', 'Server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}


In [33]:
pc.delete_index(index_name)

## Testing with chunking 

In [17]:
def preprocess_text(text):
    # Replace consecutive spaces, newlines, and tabs
    text = re.sub(r'\s+', ' ', text)
    return text

def chunk_text(text, chunk_size=512, overlap=50):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        chunks.append(chunk)
    return chunks

def process_pdf(file_path):
    reader = PdfReader(file_path)
    texts = []
    for page in reader.pages:
        text = page.extract_text()
        if text:
            preprocessed_text = preprocess_text(text)
            chunks = chunk_text(preprocessed_text)
            texts.extend(chunks)
    return texts

def create_embeddings(texts):
    embeddings = embed.embed_documents(texts)
    return embeddings


In [36]:
# Process PDFs and create embeddings
directory_path = "healthcare_pdfs"  # Path to the directory containing PDF files
all_texts = []
all_embeddings = []
all_ids = []

for file_name in os.listdir(directory_path):
    if file_name.endswith('.pdf'):
        file_path = os.path.join(directory_path, file_name)
        print(f"Processing file: {file_path}")
        texts = process_pdf(file_path)
        if texts:
            print(f"Extracted {len(texts)} texts from {file_path}")
            embeddings = create_embeddings(texts)
            print(f"Created {len(embeddings)} embeddings for {file_path}")
            all_texts.extend(texts)
            all_embeddings.extend(embeddings)
            all_ids.extend([f"{file_path}_{i}" for i in range(len(embeddings))])
        else:
            print(f"No text extracted from {file_path}")


Processing file: healthcare_pdfs/9789241513906-eng.pdf
Extracted 134 texts from healthcare_pdfs/9789241513906-eng.pdf
Created 134 embeddings for healthcare_pdfs/9789241513906-eng.pdf
Processing file: healthcare_pdfs/whoheatlhStat.pdf
Extracted 134 texts from healthcare_pdfs/whoheatlhStat.pdf
Created 134 embeddings for healthcare_pdfs/whoheatlhStat.pdf


In [37]:
#check chunked text
all_texts[:2]

print(f"Total texts: {len(all_texts)}")

#check the scources of the text
all_ids[:2]


Total texts: 268


['healthcare_pdfs/9789241513906-eng.pdf_0',
 'healthcare_pdfs/9789241513906-eng.pdf_1']

In [43]:
#upsert the embeddings
vectors = [{'id':id, 'values': np.array(embedding), 'metadata':{'text':text}} for id, embedding, text in zip(all_ids, all_embeddings, all_texts)]   
index.upsert(vectors=vectors)

{'upserted_count': 268}

In [18]:
# Describe index stats
index_stats = index.describe_index_stats()
print(index_stats)

NameError: name 'index' is not defined

In [45]:
query_embedding = create_embeddings(['what is the recommend treatment for diabetes?'])

In [46]:
index.query(vector=query_embedding, top_k=5, include_metadata=True)

{'matches': [{'id': 'healthcare_pdfs/9789241513906-eng.pdf_122',
              'metadata': {'text': 'such as the attractiveness of the '
                                   'environment or amicability of staff; '
                                   'however, it has been shown that patients '
                                   'are able to differentiate superficial '
                                   'comforts from more meaningful engagement. '
                                   '22. Patient self-management tools are '
                                   'technologies and techniques used by '
                                   'patients and families to manage their '
                                   'health issues outside formal medical '
                                   'institutions. They are increasingly '
                                   'studied as quality improvement tools in '
                                   'the context of growing empowerment of '
                        

In [47]:
results = index.query(vector=query_embedding, top_k=5)

for match in results['matches']:
    vector_id = match['id']
    # Find the index of the vector_id in all_ids
    try:
        index_in_all_ids = all_ids.index(vector_id)
        # Retrieve the corresponding text
        text = all_texts[index_in_all_ids]
        print(f"ID: {vector_id}, Score: {match['score']}")
        print(f"Text: {text}\n")  # Print the text for inspection

    except ValueError:
        print(f"ID {vector_id} not found in all_ids")

ID: healthcare_pdfs/9789241513906-eng.pdf_122, Score: 0.792224824
Text: such as the attractiveness of the environment or amicability of staff; however, it has been shown that patients are able to differentiate superficial comforts from more meaningful engagement. 22. Patient self-management tools are technologies and techniques used by patients and families to manage their health issues outside formal medical institutions. They are increasingly studied as quality improvement tools in the context of growing empowerment of patients worldwide. Given the increasing prevalence of chronic disease globally, diabetes self-management serves as a good example. Diabetic patients involved with self-management education programmes demonstrated significant reductions in glycosylated haemoglobin levels; in Uganda, patient outcomes included decreases in HbA1c percentage and diastolic blood pressure, and in Honduras, reports of self-care demonstrated improvements in over 50% of patients in blood sugar 

## Create a  VectorStore 

In [48]:
index = pc.Index(index_name)

In [49]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 268}},
 'total_vector_count': 268}

In [51]:
# Initialize Pinecone Vector Store
from langchain.vectorstores import Pinecone

text_field = "text"

vectorstore = Pinecone(index, embed.embed_query, text_field)





In [52]:
# 5. Perform Similarity Search
query = "what is the recommend treatment for diabetes?"

results = vectorstore.similarity_search(query, k=5)

results  # Print raw results


[Document(metadata={}, page_content='such as the attractiveness of the environment or amicability of staff; however, it has been shown that patients are able to differentiate superficial comforts from more meaningful engagement. 22. Patient self-management tools are technologies and techniques used by patients and families to manage their health issues outside formal medical institutions. They are increasingly studied as quality improvement tools in the context of growing empowerment of patients worldwide. Given the increasing prevalence of chronic disease globally, diabetes self-management serves as a good example. Diabetic patients involved with self-management education programmes demonstrated significant reductions in glycosylated haemoglobin levels; in Uganda, patient outcomes included decreases in HbA1c percentage and diastolic blood pressure, and in Honduras, reports of self-care demonstrated improvements in over 50% of patients in blood sugar levels, diet and medication adheren

## Create the chain

In [53]:
#chat completion llm
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.0,
    openai_api_key=os.getenv("OPENAI_API_KEY")
)

# Create a memory for the conversation
conversational_memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Create a retrieval QA chain

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

In [54]:

# Define the base prompt
SYSTEM_PROMPT = """You are an advanced AI Healthcare Assistant working in a hospital setting. Your role is to:

1. Provide accurate medical information based on verified sources
2. Help interpret medical terminology in simple terms
3. Assist with understanding healthcare procedures and protocols
4. Direct users to appropriate medical resources
5. Maintain patient privacy and medical ethics

Important guidelines:
- Always clarify you're an AI assistant, not a doctor
- Recommend consulting healthcare professionals for specific medical advice
- Base responses on scientific evidence and reliable medical sources
- Keep responses clear, professional, and empathetic
- Never make definitive medical diagnoses

Knowledge base: You have access to WHO guidelines, CDC resources, Mayo Clinic data, and general healthcare information.
"""

# Create the prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", SYSTEM_PROMPT),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])


In [None]:
qa.run(query)

  qa.run(query)


'The context provided does not specify a recommended treatment for diabetes. However, it does mention that diabetes self-management education programs can lead to significant health improvements, such as reductions in glycosylated hemoglobin levels, better blood sugar control, and improved diet and medication adherence. It also highlights the potential cost-effectiveness of mHealth interventions for self-management. For specific treatment recommendations, it is best to consult a healthcare professional or refer to clinical guidelines from reputable health organizations.'

## Tools

In [55]:
# Create multiple tools
tools = [
    Tool(
        name="Healthcare Data Analysis",
        description="Tool for analyzing healthcare dataset using pandas operations",
        func=query_healthcare_data
    ),
    Tool(
        name="Medical Literature Search",
        description="Tool for searching medical literature and guidelines",
        func=qa.run  
    )
]


### Agent Configuration and Memory Setup:

Define Agent with LangChain

#To do List
Play arround with the agent with diferent queries

implement a promtp and integrate witth the agent




In [56]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    prompt=prompt,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

  agent = initialize_agent(


In [57]:
agent(query)

  agent(query)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Medical Literature Search",
    "action_input": "recommended treatment for diabetes"
}
```[0m
Observation: [33;1m[1;3mThe recommended treatment for diabetes typically includes a combination of lifestyle changes, self-management education, and medication. Key components often involve:

1. **Lifestyle Changes**: 
   - Diet: A balanced diet that controls carbohydrate intake and includes plenty of fruits, vegetables, and whole grains.
   - Exercise: Regular physical activity to help manage weight and improve insulin sensitivity.
   - Weight Management: Achieving and maintaining a healthy weight.

2. **Self-Management Education**: 
   - Diabetes self-management education programs can help patients understand how to manage their condition effectively, including monitoring blood sugar levels and recognizing symptoms of high or low blood sugar.

3. **Medication**: 
   - Depending on the type of diabetes an

{'input': 'what is the recommend treatment for diabetes?',
 'chat_history': [HumanMessage(content='what is the recommend treatment for diabetes?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="The recommended treatment for diabetes typically includes a combination of lifestyle changes, self-management education, and medication. Key components often involve:\n\n1. **Lifestyle Changes**: \n   - Diet: A balanced diet that controls carbohydrate intake and includes plenty of fruits, vegetables, and whole grains.\n   - Exercise: Regular physical activity to help manage weight and improve insulin sensitivity.\n   - Weight Management: Achieving and maintaining a healthy weight.\n\n2. **Self-Management Education**: \n   - Diabetes self-management education programs can help patients understand how to manage their condition effectively, including monitoring blood sugar levels and recognizing symptoms of high or low blood sugar.\n\n3. **Medication**: \n   - Depending on the ty

In [74]:
agent("can you tell how many admissions by month?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Healthcare Data Analysis",
    "action_input": "Please provide the dataset containing admission dates so that I can analyze the number of admissions by month."
}
```[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[['Date of Admission']].copy()"}`


[0m[36;1m[1;3m      Date of Admission
0            2024-01-31
1            2019-08-20
2            2022-09-22
3            2020-11-18
4            2022-09-19
...                 ...
55495        2020-08-16
55496        2020-01-23
55497        2020-07-13
55498        2019-05-25
55499        2024-04-02

[55500 rows x 1 columns][0m[32;1m[1;3mThe dataset containing admission dates has 55,500 entries. Here is a preview of the data:

```
      Date of Admission
0            2024-01-31
1            2019-08-20
2            2022-09-22
3            2020-11-18
4            2022-09-19
...
55495     

{'input': 'can you tell how many admissions by month?',
 'chat_history': [HumanMessage(content='what is the recommend treatment for diabetes?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The recommended treatment for diabetes typically includes a combination of lifestyle changes, self-management education, and medication. Key components include:\n\n1. **Lifestyle Changes**: A balanced diet low in sugar and refined carbohydrates, high in fiber, fruits, and vegetables, along with regular physical activity to help control blood sugar levels and maintain a healthy weight.\n\n2. **Self-Management Education**: Programs that help patients understand how to manage their condition effectively, including monitoring blood sugar levels and recognizing symptoms of high or low blood sugar.\n\n3. **Medication**: Depending on the type of diabetes and individual patient needs, medications may include insulin or oral hypoglycemic agents to help control blood sugar levels.\n\n4. **

In [72]:
question1 = "what is the recommend treatment for diabetes?"
question2 = "what is the average billing amount?"
question3 = "How many patients are there with cancer?"
question4 = "Can you tell how many admissions by month?"
question5 = "Can you tell a prediction based on the condition what is the average  billing amount?"
questions = [question1, question2, question3, question4, question5]

for question in questions:
    response = agent(question)
    print(f"Question: {question}\nResponse: {response}\n")





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Medical Literature Search",
    "action_input": "recommended treatment for diabetes"
}
```[0m
Observation: [33;1m[1;3mThe recommended treatment for diabetes typically includes a combination of lifestyle changes, self-management education, and medication. Key components often include:

1. **Lifestyle Changes**: 
   - **Diet**: A balanced diet that is low in sugar and refined carbohydrates, and high in fiber, fruits, and vegetables.
   - **Exercise**: Regular physical activity to help control blood sugar levels and maintain a healthy weight.

2. **Self-Management Education**: 
   - Education programs that teach patients how to monitor their blood sugar levels, manage their diet, and adhere to their medication regimen.

3. **Medication**: 
   - Depending on the type of diabetes and individual needs, medications may include insulin or oral hypoglycemic agents to help control blood sugar levels.

4. **R

### Multimodal Interaction
Text/Voice input and output

In [58]:
import os
from openai import OpenAI
from dotenv import load_dotenv
from IPython.display import Audio

# Load environment variables
load_dotenv()

def text_to_speech(text: str, voice: str = "alloy") -> bytes:
    """
    Convert text to speech using OpenAI's TTS API
    
    Args:
        text (str): The text to convert to speech
        voice (str): The voice to use (alloy, echo, fable, onyx, nova, shimmer)
    
    Returns:
        bytes: Audio content in MP3 format
    """
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    
    response = client.audio.speech.create(
        model="tts-1",
        voice=voice,
        input=text
    )
    
    return response.content

# Convert text to speech
text = "The recommended treatment for diabetes includes a combination of lifestyle changes, medication, and regular monitoring of blood sugar levels."
audio = text_to_speech(text)

# Save the audio file
audio_file = "output.mp3"
with open(audio_file, "wb") as f:
    f.write(audio)

# Play the audio file (if in Jupyter notebook)
Audio(audio_file, autoplay=True)

#export to python file and then import to streamlit 