Document Prep

In [15]:
import pandas as pd
from langchain.docstore.document import Document

In [16]:
df = pd.read_csv("Symptom2Disease.csv")

docs = [Document(page_content=row["text"], metadata={"disease": row["label"]})
        for _, row in df.iterrows()]

Embeddings Creation and Storing in Vector DB

In [17]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Force PyTorch backend
hf_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
    model_kwargs={"device": "cpu"} 
)

# Create FAISS vectorstore
vectorstore = FAISS.from_documents(docs, hf_embeddings)

HuggingFace Transformer Model

In [18]:
from transformers import pipeline

qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")

Device set to use mps:0


In [19]:
query = "I have itchy, scaly patches on my elbows and scalp. What disease could this be?"

# retriever
retrieved_docs = vectorstore.similarity_search(query, k=3)

# Combine retrieved docs into context
context = " ".join([d.page_content for d in retrieved_docs])

result = qa_model(question=query, context=context)

print("Answer:", result["answer"])
print("\nRetrieved Docs:", [d.metadata for d in retrieved_docs])

Answer: skin rash

Retrieved Docs: [{'disease': 'Psoriasis'}, {'disease': 'Chicken pox'}, {'disease': 'Fungal infection'}]


In [20]:
query = "What could this be the symptoms of diabetes? Give me a list"

# retriever
retrieved_docs = vectorstore.similarity_search(query, k=3)

# Combine retrieved docs into context
context = " ".join([d.page_content for d in retrieved_docs])

result = qa_model(question=query, context=context)

print("Answer:", result["answer"])
print("\nRetrieved Docs:", [d.metadata for d in retrieved_docs])

Answer: excessive hunger, a stiff neck, depression, irritability, and visual disturbance

Retrieved Docs: [{'disease': 'diabetes'}, {'disease': 'Jaundice'}, {'disease': 'Migraine'}]


Open AI LLM

In [22]:
import os
os.environ["OPENAI_API_KEY"] = openai_api_2

In [23]:
from openai import OpenAI

client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [28]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA


retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
llm = ChatOpenAI(model="gpt-3.5-turbo")


qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [29]:
query = "What are the common symptoms of malaria?"
result = qa({"query": query})

print("Answer:", result["result"])
print("\nSource docs:")
for doc in result["source_documents"]:
    print("-", doc.metadata, "\n", doc.page_content[:200], "...")

Answer: The common symptoms of malaria include high fever, chills, sweating, headache, muscle aches, nausea, vomiting, and diarrhea. Severe cases of malaria can also lead to breathing difficulties, confusion, and organ failure. If you suspect you have malaria or any serious illness, it is important to seek medical attention promptly.

Source docs:
- {'disease': 'Malaria'} 
 I have a high fever, severe itching, chills, and vomiting. I also have a headache and have been perspiring a lot. Additionally hurting me have been nausea and muscle soreness. ...
- {'disease': 'Malaria'} 
 I have a high fever, severe itching, chills, and vomiting. I also have a headache and have been perspiring a lot. Additionally hurting me have been nausea and muscle soreness. ...
- {'disease': 'Bronchial Asthma'} 
 Recently, I have had a number of concerning symptoms, including a dry cough, impaired breathing, a high fever, and a lot of mucus. I also feel very weak and tired. ...


In [30]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

prompt_template = """You are a medical assistant.
Use the context below to answer the question.
If asked about symptoms, always give them as bullet points.

Context:
{context}

Question:
{question}

Answer:
"""

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt}
)

query = "What are the symptoms of malaria?"
answer = qa.run(query)
print(answer)


- High fever
- Chills
- Severe itching
- Vomiting
- Headache
- Perspiring a lot
- Nausea
- Muscle soreness
