In [1]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from langchain.schema import Document
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# Set environment variables
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY or ""

def load_pdf_files(data):
    loader = DirectoryLoader(
        data, 
        glob="*.pdf", 
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    return documents

def filter_to_minimal_docs(documents: List[Document]) -> List[Document]:
    """Filters the documents to only include those with minimal content."""
    return [doc for doc in documents if len(doc.page_content.strip()) > 0]

def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, 
        chunk_overlap=20,
        length_function=len
    )
    return text_splitter.split_documents(minimal_docs)

def download_embeddings():
    """Downloads and returns the HuggingFace embeddings model."""
    return HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
    )

def setup_better_local_llm():
    """Set up a better local LLM for medical questions"""
    try:
        # Try to use a more capable local model
        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
        from langchain_huggingface import HuggingFacePipeline
        import torch
        
        # Try different models in order of preference
        models_to_try = [
            "microsoft/DialoGPT-medium",    # Good for conversation
            "microsoft/DialoGPT-large",     # Even better
            "distilgpt2",                   # Fallback
        ]
        
        for model_name in models_to_try:
            try:
                print(f"Trying model: {model_name}")
                
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForCausalLM.from_pretrained(model_name)
                
                # Add padding token if it doesn't exist
                if tokenizer.pad_token is None:
                    tokenizer.pad_token = tokenizer.eos_token
                
                pipe = pipeline(
                    "text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_new_tokens=200,
                    temperature=0.3,
                    do_sample=True,
                    top_p=0.9,
                    repetition_penalty=1.1,
                    pad_token_id=tokenizer.eos_token_id,
                    device_map="auto" if torch.cuda.is_available() else "cpu",
                )
                
                llm = HuggingFacePipeline(pipeline=pipe)
                
                # Test the model
                test_response = llm.invoke("Hello, how are you?")
                print(f"✓ {model_name} works")
                return llm
                
            except Exception as e:
                print(f"✗ {model_name} failed: {str(e)[:100]}...")
                continue
        
        return None
        
    except Exception as e:
        print(f"Local model setup failed: {e}")
        return None

def create_medical_prompt_template():
    """Create a better prompt template for medical questions"""
    
    system_prompt = """You are a medical expert assistant. Use the retrieved context to answer the question accurately and concisely.

IMPORTANT INSTRUCTIONS:
1. Use ONLY the provided context to answer the question
2. If the context doesn't contain the answer, say "I don't have enough information about this specific condition"
3. Be factual and precise
4. Use simple language that patients can understand
5. Focus on key information: symptoms, causes, treatments
6. Keep your answer to 2-3 sentences maximum

CONTEXT:
{context}

QUESTION:
{input}

ANSWER:"""
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
    ])
    
    return prompt

# Main execution
if __name__ == "__main__":
    print("Starting medical chatbot setup...")
    
    # Load and process documents
    print("Loading PDF documents...")
    extracted_data = load_pdf_files("data")
    minimal_docs = filter_to_minimal_docs(extracted_data)
    texts_chunk = text_split(minimal_docs)
    print(f"Processed {len(texts_chunk)} text chunks")
    
    # Initialize embeddings
    print("Initializing embeddings...")
    embeddings = download_embeddings()
    print("✓ Embeddings initialized successfully")
    
    # Initialize Pinecone
    print("Initializing Pinecone...")
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index_name = "medical-chatbot"
    
    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        print("Creating Pinecone index...")
        pc.create_index(
            name=index_name,
            dimension=384,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1")
        )
        print("✓ Pinecone index created")
    else:
        print("✓ Pinecone index already exists")
    
    # Create vector store
    print("Creating vector store...")
    docsearch = PineconeVectorStore.from_documents(
        documents=texts_chunk,
        embedding=embeddings,
        index_name=index_name
    )
    print("✓ Vector store created")
    
    # Initialize retriever
    retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    
    # Test retriever
    print("Testing retriever...")
    retrieved_docs = retriever.invoke("What is Acne?")
    print(f"✓ Retriever works - retrieved {len(retrieved_docs)} documents")
    
    # Setup better local LLM
    print("Setting up local LLM...")
    chat_model = setup_better_local_llm()
    
    if chat_model is None:
        print("Using simple echo fallback...")
        from langchain.llms import FakeListLLM
        chat_model = FakeListLLM(responses=["Based on the medical context, I can provide information about various conditions."])
    
    # Create better medical prompt template
    prompt = create_medical_prompt_template()
    
    # Create chains
    question_answer_chain = create_stuff_documents_chain(chat_model, prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)
    
    # Test the chain with better prompting
    try:
        print("Testing RAG chain with medical question...")
        
        # First, let's see what context is retrieved
        retrieved = retriever.invoke("what is Acromegaly and gigantism?")
        print("\nRetrieved context snippets:")
        for i, doc in enumerate(retrieved):
            print(f"{i+1}. {doc.page_content[:100]}...")
        
        # Now test the full RAG chain
        response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
        print(f"\nAnswer: {response['answer']}")
        
    except Exception as e:
        print(f"RAG chain failed: {e}")
        print("Trying manual approach...")
        
        # Manual approach with better prompting
        retrieved = retriever.invoke("what is Acromegaly and gigantism?")
        context = "\n\n".join([doc.page_content for doc in retrieved])
        
        # Create a better manual prompt
        manual_prompt = f"""Based on this medical context: {context}

Please answer: what is Acromegaly and gigantism?

Focus on:
1. What are these conditions?
2. What causes them?
3. Key symptoms

Answer:"""
        
        try:
            manual_response = chat_model.invoke(manual_prompt)
            print("Manual response:", manual_response)
        except Exception as e:
            print(f"Manual generation failed: {e}")
            print("Showing retrieved context instead:")
            for i, doc in enumerate(retrieved):
                print(f"\n--- Document {i+1} ---")
                print(doc.page_content[:200] + "...")

  from .autonotebook import tqdm as notebook_tqdm


Starting medical chatbot setup...
Loading PDF documents...
Processed 5859 text chunks
Initializing embeddings...


  return HuggingFaceEmbeddings(


✓ Embeddings initialized successfully
Initializing Pinecone...
✓ Pinecone index already exists
Creating vector store...
✓ Vector store created
Testing retriever...
✓ Retriever works - retrieved 3 documents
Setting up local LLM...
Trying model: microsoft/DialoGPT-medium


Device set to use mps:0


✓ microsoft/DialoGPT-medium works
Testing RAG chain with medical question...

Retrieved context snippets:
1. Whitehouse Station, NJ: Merck Research Laboratories,
1997.
Larsen, D. E., ed. Mayo Clinic Family Hea...
2. Whitehouse Station, NJ: Merck Research Laboratories,
1997.
Larsen, D. E., ed. Mayo Clinic Family Hea...
3. Whitehouse Station, NJ: Merck Research Laboratories,
1997.
Larsen, D. E., ed. Mayo Clinic Family Hea...

Answer: System: You are a medical expert assistant. Use the retrieved context to answer the question accurately and concisely.

IMPORTANT INSTRUCTIONS:
1. Use ONLY the provided context to answer the question
2. If the context doesn't contain the answer, say "I don't have enough information about this specific condition"
3. Be factual and precise
4. Use simple language that patients can understand
5. Focus on key information: symptoms, causes, treatments
6. Keep your answer to 2-3 sentences maximum

CONTEXT:
Whitehouse Station, NJ: Merck Research Laboratories,
1997.
