In [53]:
%pwd

'd:\\GenAI-end-to-end-Medical-ChatBot\\research'

In [54]:
import os

os.chdir("../")

In [55]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [56]:
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [57]:
extracted_data= load_pdf_file("Data/")

In [58]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [59]:
text_chunks = text_split(extracted_data)
print("length of text chuncks", len(text_chunks)) 

length of text chuncks 40000


In [60]:
def download_hugging_face_embeddings():
    # Use the new import to avoid deprecation warning
    try:
        from langchain_huggingface import HuggingFaceEmbeddings
    except ImportError:
        # Fallback to the old import if the new package is not available
        from langchain.embeddings import HuggingFaceEmbeddings
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [61]:
embeddings = download_hugging_face_embeddings()

In [62]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [63]:
from dotenv import load_dotenv
load_dotenv()

import os

from pinecone.grpc import PineconeGRPC as Pinecone

PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
index_name = "genai-medical-chatbot"

pc = Pinecone(api_key=PINECONE_API_KEY)

In [64]:
# Debug: Check .env file and environment loading
import os
from dotenv import load_dotenv

print("🔍 Debugging environment variables...")
print(f"Current working directory: {os.getcwd()}")

# Check if .env file exists
env_file_path = ".env"
if os.path.exists(env_file_path):
    print(f"✅ Found .env file at: {os.path.abspath(env_file_path)}")
    
    # Read and display .env file contents (safely)
    with open(env_file_path, 'r') as f:
        content = f.read()
        lines = content.split('\n')
        print(f"📄 .env file contains {len(lines)} lines:")
        for i, line in enumerate(lines, 1):
            if line.strip() and not line.startswith('#'):
                # Hide actual key values for security
                if '=' in line:
                    key, value = line.split('=', 1)
                    print(f"  Line {i}: {key}={'***' if value else '(empty)'}")
                else:
                    print(f"  Line {i}: {line}")
            elif line.strip():
                print(f"  Line {i}: {line}")
else:
    print(f"❌ No .env file found at: {os.path.abspath(env_file_path)}")
    
# Reload environment variables
print("\n🔄 Reloading environment variables...")
load_result = load_dotenv(override=True)
print(f"load_dotenv() result: {load_result}")

# Check environment variables
print(f"\n🔍 Environment variable check:")
pinecone_key = os.environ.get("PINECONE_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")

print(f"PINECONE_API_KEY: {'✅ Found' if pinecone_key else '❌ Not found'}")
print(f"OPENAI_API_KEY: {'✅ Found' if openai_key else '❌ Not found'}")

if openai_key:
    print(f"OpenAI key starts with: {openai_key[:7]}...")
else:
    print("💡 Try checking if your .env file has the correct format:")
    print("   OPENAI_API_KEY=sk-your-actual-key-here")
    print("   (no spaces around the = sign)")

🔍 Debugging environment variables...
Current working directory: d:\GenAI-end-to-end-Medical-ChatBot
✅ Found .env file at: d:\GenAI-end-to-end-Medical-ChatBot\.env
📄 .env file contains 2 lines:
  Line 1: PINECONE_API_KEY =***
  Line 2: OPENAI_API_KEY =***

🔄 Reloading environment variables...
load_dotenv() result: True

🔍 Environment variable check:
PINECONE_API_KEY: ✅ Found
OPENAI_API_KEY: ✅ Found
OpenAI key starts with: sk-proj...


In [65]:
from pinecone import ServerlessSpec
# Check if index already exists
if index_name not in pc.list_indexes().names():
    try:
        pc.create_index(
            name=index_name,
            dimension=384,  # Dimension of the embeddings (sentence-transformers/all-MiniLM-L6-v2)
            metric="cosine",  # Similarity metric
            spec=ServerlessSpec(
                cloud="aws",
                region="us-east-1",  # Try us-east-1 instead of us-east-2
            )
        )
        print(f"✅ Index '{index_name}' created successfully!")
    except Exception as e:
        print(f"❌ Error creating index with us-east-1: {e}")
        print("Trying with us-west-2...")
        try:
            pc.create_index(
                name=index_name,
                dimension=384,
                metric="cosine",
                spec=ServerlessSpec(
                    cloud="aws",
                    region="us-west-2",  # Alternative region
                )
            )
            print(f"✅ Index '{index_name}' created successfully with us-west-2!")
        except Exception as e2:
            print(f"❌ Error with us-west-2: {e2}")
            print("Please check your Pinecone plan and available regions.")
else:
    print(f"✅ Index '{index_name}' already exists!")

✅ Index 'genai-medical-chatbot' already exists!


In [66]:
# Reload environment variables to get the latest values
from dotenv import load_dotenv
load_dotenv(override=True)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")

print(f"🔄 Reloaded environment variables:")
print(f"PINECONE_API_KEY: {'✅ Found' if PINECONE_API_KEY else '❌ Missing'}")
print(f"OPENAI_API_KEY: {'✅ Found' if OPENAI_API_KEY else '❌ Missing'}")

if OPENAI_API_KEY:
    print(f"OpenAI key starts with: {OPENAI_API_KEY[:7]}...")

🔄 Reloaded environment variables:
PINECONE_API_KEY: ✅ Found
OPENAI_API_KEY: ✅ Found
OpenAI key starts with: sk-proj...


In [67]:
import os

# Check and set environment variables safely
if PINECONE_API_KEY:
    os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
    print("✅ PINECONE_API_KEY set successfully")
else:
    print("❌ PINECONE_API_KEY is not available")

if OPENAI_API_KEY:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    print("✅ OPENAI_API_KEY set successfully")
else:
    print("❌ OPENAI_API_KEY is not available")
    print("💡 Please check your .env file and ensure OPENAI_API_KEY is set")
    print("💡 You can also set it manually: OPENAI_API_KEY = 'your-key-here'")

✅ PINECONE_API_KEY set successfully
✅ OPENAI_API_KEY set successfully


In [68]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=index_name,
)

In [69]:
#existing index

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    embedding=embeddings,
    index_name=index_name,
)

In [70]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1f2a2ce7940>

In [71]:
retriver = docsearch.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}  # Number of documents to retrieve
)

In [72]:
retrived_docs = retriver.invoke("What is the treatment for diabetes?")

In [None]:
# It was paid :(
#from langchain_openai import OpenAI
#llm= OpenAI(temperature=0.4, max_tokens=500)
#llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.4, max_tokens=300)

In [73]:
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize Ollama model
llm = Ollama(model="llama3")  # or "llama3", "gemma", etc.

In [82]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are a helpful medical assistant. "
    "You are assitant for question-answering tasks"
    "Use the provided documents to answer the user's question accurately and concisely. "
    "If the information is not available in the documents, respond with 'I don't know'."
    "answer concisely in almost 3 lines upto what you feel like is good enough for the user to understand the answer. "
    "if answer is not available in the documents, respond with basic knowlege "
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [83]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriver, question_answer_chain)

In [80]:
response = rag_chain.invoke({"input" : "What is acne?"}) 
print(response['answer'])

According to Gale Encyclopedia of Medicine, acne is a skin disorder where sebaceous glands become inflamed. This occurs due to an increase in sebum production and clogging of pores, leading to pimples forming on the skin.


In [84]:
response = rag_chain.invoke({"input" : "What is artifical intelligence?"}) 
print(response['answer'])

I don't know. The provided documents do not mention artificial intelligence. However, based on general knowledge, artificial intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation.
