In [15]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from pinecone import Pinecone, ServerlessSpec

# Load environment variables
load_dotenv()

True

In [16]:
# Load Pinecone credentials
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = "medical-chatbot" # Ensure this matches your index name

In [17]:
print(f"✓ Pinecone API Key loaded: {PINECONE_API_KEY[:5]}...")

✓ Pinecone API Key loaded: pcsk_...


In [18]:
# --- 1. Load Data ---
def load_pdf(data_path):
    loader = DirectoryLoader(
        data_path,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    return documents

In [19]:
# Update path to your specific directory
data_path = "D:\\End_to_End_Medical_Chatbot\\data"
extracted_data = load_pdf(data_path)
print(f"Documents loaded: {len(extracted_data)}")

Documents loaded: 637


In [20]:
# --- 2. Create Chunks ---
def text_split(data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(data)
    return text_chunks

In [21]:
text_chunks = text_split(extracted_data)
print("Length of chunks:", len(text_chunks))

Length of chunks: 7020


In [22]:
# --- 3. Download Embeddings ---
def download_hugging_face_embeddings():
    # Updated to use langchain_huggingface
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [23]:
embeddings = download_hugging_face_embeddings()

In [24]:
# --- 4. Initialize Pinecone & Store Vectors ---
# Initialize the new Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Check if index exists, create if not (Optional - assumes you have Serverless)
existing_indexes = [index.name for index in pc.list_indexes()]

if PINECONE_INDEX_NAME not in existing_indexes:
    print(f"Creating index: {PINECONE_INDEX_NAME}")
    pc.create_index(
        name=PINECONE_INDEX_NAME,
        dimension=384, # Matches embedding dimension
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws", 
            region="us-east-1"
        )
    )

# Create VectorStore from documents
# Note: Use from_documents instead of from_texts for better metadata handling
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=PINECONE_INDEX_NAME
)

In [25]:
# --- 5. Setup LLM (LlamaCpp) ---
# Define Prompt Template
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [26]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [27]:
# Load LlamaCpp (Replaces CTransformers for better support)
# Make sure you have downloaded the GGUF model file to this path
local_model_path = "D:\\End_to_End_Medical_Chatbot\\model\\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"

In [28]:
llm = LlamaCpp(
    model_path=local_model_path,
    n_ctx=2048,           # Context window
    temperature=0.8,
    max_tokens=512,
    verbose=True,
    n_gpu_layers=0,  # CPU only - disable GPU acceleration
    use_mlock=False   # Disable memory locking for compatibility
)

llama_model_loader: loaded meta data with 33 key-value pairs and 292 tensors from D:\End_to_End_Medical_Chatbot\model\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Meta Llama 3.1 8B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Meta-Llama-3.1
llama_model_loader: - kv   5:                         general.size_label str              = 8B
llama_model_loader: - kv   6:                            general.license str              = llama3.1

In [29]:
# --- 6. Retrieval Chain ---
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs
)

In [30]:
# --- 7. Chat Loop ---
while True:
    user_input = input(f"Input Prompt: ")
    if user_input.lower() in ["exit", "quit"]:
        break
    
    result = qa.invoke({"query": user_input})
    print("Response : ", result["result"])