In [2]:
import pandas as pd
from langchain import PromptTemplate
import bs4
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_google_genai import ChatGoogleGenerativeAI
import os
import google.generativeai as genai
from langchain_community.document_loaders import DirectoryLoader

In [4]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [5]:
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
INDEX_NAME = os.getenv('INDEX_NAME')

In [7]:
def load_csv(data_directory):
    # Use DirectoryLoader to load all CSV files from the specified directory
    loader = DirectoryLoader(
        data_directory,
        glob="*.csv",        # Pattern to match CSV files
        loader_cls=CSVLoader # Use CSVLoader to load the files
    )
    
    # Load the documents
    documents = loader.load()

    return documents

In [74]:
extracted_data= load_csv("data/")

In [76]:
print(extracted_data[0].page_content)

: 0
label: Psoriasis
text: I have been experiencing a skin rash on my arms, legs, and torso for the past few weeks. It is red, itchy, and covered in dry, scaly patches.


In [77]:
len(extracted_data)

2

In [8]:
def text_split(extracted_data):
    # Initialize the text splitter with desired chunk size and overlap
      text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
    
    
    # Split the extracted data into text chunks
      all_splits = text_splitter.split_documents(extracted_data)

      return all_splits

In [79]:
text_splits = text_split(extracted_data)

In [80]:
len(text_splits)

2

In [81]:
text_splits[0].page_content

': 0\nlabel: Psoriasis\ntext: I have been experiencing a skin rash on my arms, legs, and torso for the past few weeks. It is red, itchy, and covered in dry, scaly patches.'

In [98]:
type(text_splits)

list

In [7]:
def download_google_embeddings():
    GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
    # Initialize the Google Generative AI Embeddings model
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
                                        
    return embeddings

In [8]:
embeddings= download_google_embeddings()

In [9]:
embeddings

GoogleGenerativeAIEmbeddings(client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x00000289E375D430>, model='models/embedding-001', task_type=None, google_api_key=None, credentials=None, client_options=None, transport=None, request_options=None)

In [10]:
vector = embeddings.embed_query("hello, world!")
vector[:5]

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734449505806,
 0.01813092641532421]

In [1]:
# from langchain.vectorstores import Pinecone
from langchain_community.vectorstores import Pinecone
import pinecone

In [None]:
#Initializing the Pinecone
vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
#Creating Embeddings for Each of The Text Chunks & storing
vectorstore.add_documents(text_splits)
# docsearch=Pinecone.from_texts([t.page_content for t in text_splits], embeddings, index_name=index_name)
# Connect to Pinecone index and insert the chunked docs as contents  docsearch==vector_index     Initialize vectorstore while adding records:
# docsearch = PineconeVectorStore.from_documents(all_splits, embeddings, index_name=index_name)

In [99]:
type(vectorstore)

langchain_pinecone.vectorstores.PineconeVectorStore

In [97]:
vectorstore.add_documents(text_splits)

['fd413269-7928-43af-8bf1-cbfbfd0040dd',
 'fee3110e-14bf-4257-aeb9-c16f79ca66b3']

In [11]:
#If we already have an index we can load it like this
docsearch=PineconeVectorStore.from_existing_index(INDEX_NAME, embeddings)

#initializa and push new embeddings
# docsearch = PineconeVectorStore.from_documents(all_splits, embeddings, index_name=index_name)   
query = "What are Allergies"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content="question: What is (are) Allergy ?\nanswer: An allergy is a reaction by your immune system to something that does not bother most other people. People who have allergies often are sensitive to more than one thing. Substances that often cause reactions are       - Pollen    - Dust mites    - Mold spores    - Pet dander    - Food    - Insect stings    - Medicines       Normally, your immune system fights germs. It is your body's defense system. In most allergic reactions, however, it is responding to a false alarm. Genes and the environment probably both play a role.    Allergies can cause a variety of symptoms such as a runny nose, sneezing, itching, rashes, swelling, or asthma. Allergies can range from minor to severe. Anaphylaxis is a severe reaction that can be life-threatening. Doctors use skin and blood tests to diagnose allergies. Treatments include medicines, allergy shots, and avoiding the substances that cause the reactions.    NIH: National Instit

In [14]:
# template = """You are an assistant for question-answering tasks. Do not mention about context provided in answer but use it to generate answer.
# Your task is to diagnose common medical conditions based on user symptoms input. Answer considering the most common medical condition possible.
# You will provide suggestions and recommendations for further action. Use the following pieces of retrieved context to answer the question.
# Answer in detail with possible medical condition,suggestions and recommendations for further action.

# Question: {question}

# Context: {context}

# Answer:"""
# QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain
# #

In [12]:
# Define the prompt template
prompt_template = """You are an assistant for question-answering tasks.
Your task is to diagnose common medical conditions based on user symptoms input, if provided. Answer considering the most common medical condition possible.
You will provide suggestions and recommendations for further action. Use the following pieces of retrieved context to answer the question.
Answer in detail with possible medical condition,suggestions and recommendations for further action.
Do not mention about context provided in answer but use it to generate answer.
If user greets, then only greet back with your proper introduction. 

Question: {question}

Context: {context}

Answer:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}


In [13]:

# Initialize the language model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    # google_api_key=app.config['GOOGLE_API_KEY'],
    temperature=0.8,
    max_output_tokens=512
)


In [14]:
from langchain.chains import RetrievalQA
# Create QA chain
qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [15]:
%pip install textwrap3


Note: you may need to restart the kernel to use updated packages.


In [16]:
from IPython.display import display
from IPython.display import Markdown
import textwrap

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [22]:

user_input=input(f"Input Prompt:")
result=qa.invoke({"query": user_input})
to_markdown("Response : "+ result["result"])

> Response : It sounds like you might be experiencing Chronic Fatigue Syndrome. This condition is characterized by persistent and debilitating fatigue that doesn't improve with rest.  
> 
> Here are some suggestions and recommendations:
> 
> * **See a doctor:** It's crucial to consult a healthcare professional to rule out any underlying medical conditions that could be causing your fatigue. They can perform a physical examination, order blood tests, and potentially recommend further investigations.
> * **Gradual exercise:** While it might seem counterintuitive, gentle exercise can actually help improve energy levels in people with Chronic Fatigue Syndrome. Start with short, low-intensity activities and gradually increase the duration and intensity as you feel better.
> * **Sleep hygiene:** Make sure you're getting enough quality sleep. Establish a regular sleep schedule, create a relaxing bedtime routine, and avoid caffeine and alcohol before bed.
> * **Stress management:** Chronic stress can contribute to fatigue. Explore stress-reducing techniques such as meditation, yoga, or deep breathing exercises.
> * **Diet and nutrition:** A healthy diet rich in fruits, vegetables, and whole grains can provide your body with the nutrients it needs to function optimally. Stay hydrated by drinking plenty of water throughout the day.
> 
> Remember, it's important to seek professional medical advice for a proper diagnosis and personalized treatment plan. 


In [23]:
to_markdown("Response : "+ result["result"])

> Response : It sounds like you might be experiencing Chronic Fatigue Syndrome. This condition is characterized by persistent and debilitating fatigue that doesn't improve with rest.  
> 
> Here are some suggestions and recommendations:
> 
> * **See a doctor:** It's crucial to consult a healthcare professional to rule out any underlying medical conditions that could be causing your fatigue. They can perform a physical examination, order blood tests, and potentially recommend further investigations.
> * **Gradual exercise:** While it might seem counterintuitive, gentle exercise can actually help improve energy levels in people with Chronic Fatigue Syndrome. Start with short, low-intensity activities and gradually increase the duration and intensity as you feel better.
> * **Sleep hygiene:** Make sure you're getting enough quality sleep. Establish a regular sleep schedule, create a relaxing bedtime routine, and avoid caffeine and alcohol before bed.
> * **Stress management:** Chronic stress can contribute to fatigue. Explore stress-reducing techniques such as meditation, yoga, or deep breathing exercises.
> * **Diet and nutrition:** A healthy diet rich in fruits, vegetables, and whole grains can provide your body with the nutrients it needs to function optimally. Stay hydrated by drinking plenty of water throughout the day.
> 
> Remember, it's important to seek professional medical advice for a proper diagnosis and personalized treatment plan. 


In [24]:
print("Response : ", result["source_documents"][0].page_content)

Response :  : 151
label: Chicken pox
text: I'm feeling fatigued and have no energy. I can barely keep my eyes open during the day, and I've been feeling lethargic and unable to motivate myself.


In [25]:
result

{'query': 'i feel tired always',
 'result': "It sounds like you might be experiencing Chronic Fatigue Syndrome. This condition is characterized by persistent and debilitating fatigue that doesn't improve with rest.  \n\nHere are some suggestions and recommendations:\n\n* **See a doctor:** It's crucial to consult a healthcare professional to rule out any underlying medical conditions that could be causing your fatigue. They can perform a physical examination, order blood tests, and potentially recommend further investigations.\n* **Gradual exercise:** While it might seem counterintuitive, gentle exercise can actually help improve energy levels in people with Chronic Fatigue Syndrome. Start with short, low-intensity activities and gradually increase the duration and intensity as you feel better.\n* **Sleep hygiene:** Make sure you're getting enough quality sleep. Establish a regular sleep schedule, create a relaxing bedtime routine, and avoid caffeine and alcohol before bed.\n* **Stress 