In [21]:
import os

# Full path to your project (adjust the username if needed)
project_path = os.path.expanduser("~/Desktop/Medibot/FirstAid.AI")

# Change working directory
os.chdir(project_path)

# Confirm it's set correctly
print("Current working directory:", os.getcwd())


Current working directory: C:\Users\Soumya Sengupta\Desktop\Medibot\FirstAid.AI


In [22]:
%pwd

'C:\\Users\\Soumya Sengupta\\Desktop\\Medibot\\FirstAid.AI'

In [62]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [86]:
def load_pdf_file(data):
    loader= DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [87]:
extracted_data=load_pdf_file(data='Data/')

In [88]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [89]:
text_chunks=text_split(extracted_data)
print(len(text_chunks))

5860


In [147]:
from langchain.embeddings import HuggingFaceHubEmbeddings

def download_embeddings_hfhub():
    embeddings = HuggingFaceHubEmbeddings(
        repo_id="sentence-transformers/all-MiniLM-L6-v2",
        huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
    )
    return embeddings

embeddings=download_embeddings_hfhub()


In [91]:
query_result = embeddings.embed_query("Hello world")
print(len(query_result))

384


In [140]:
from dotenv import load_dotenv
load_dotenv()

True

In [92]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from dotenv import load_dotenv
import os
load_dotenv()
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

index_name = "medichat"


pc.create_index(
    name=index_name,
    dimension=384, 
    metric="cosine", 
    spec=ServerlessSpec(
        cloud="aws", 
        region="us-east-1"
    ) 
) 

In [95]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings, 
)

In [96]:
from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [134]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":10})

In [145]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os

load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

llm = ChatGroq(
    model_name="llama3-70b-8192",  # or mistral-7b, llama2-70b
    api_key=groq_api_key,
    temperature=0.4,
    max_tokens=500,
)


from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "the question. If you don't know the answer, say that you "
    "don't know. Use ten sentences maximum and keep the "
    "answer concise.\n\n{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [137]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [146]:
response = rag_chain.invoke({"input": "what is chronic kidney disease and what are its stages? What lab tests should be done?"})
print(response["answer"])



Chronic kidney disease (CKD) is a gradual loss of kidney function over time, leading to the kidneys being unable to filter waste and excess fluids from the blood effectively.

The stages of CKD are based on the level of kidney function, which is measured by the glomerular filtration rate (GFR). The stages are:

1. Stage 1: Kidney damage with normal GFR (≥90 mL/min)
2. Stage 2: Mild reduction in GFR (60-89 mL/min)
3. Stage 3: Moderate reduction in GFR (30-59 mL/min)
4. Stage 4: Severe reduction in GFR (15-29 mL/min)
5. Stage 5: End-stage renal disease (ESRD) with GFR <15 mL/min

Lab tests to diagnose and monitor CKD include:

1. Blood tests:
	* Creatinine: measures kidney function
	* Blood urea nitrogen (BUN): measures waste products in the blood
	* Electrolyte panel: measures sodium, potassium, and other electrolytes
2. Urine tests:
	* Urinalysis: examines the physical and chemical properties of urine
	* Proteinuria: measures protein levels in urine
	* Creatinine clearance: measures ki