In [None]:
import os
from typing import List, Optional

import torch
import numpy as np

from langchain_core.language_models.llms import LLM
from langchain.schema import Document
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from pinecone import Pinecone, ServerlessSpec

# -------------------------
# Load PDF files
# -------------------------
def load_pdf_files(data_folder: str):
    loader = DirectoryLoader(
        data_folder,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    return documents

extracted_data = load_pdf_files("../data")
print(f"Loaded {len(extracted_data)} documents.")


  from .autonotebook import tqdm as notebook_tqdm


Loaded 637 documents.


In [None]:

# -------------------------
# Filter minimal docs
# -------------------------
def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(Document(page_content=doc.page_content, metadata={"source": src}))
    return minimal_docs

minimal_docs = filter_to_minimal_docs(extracted_data)

# -------------------------
# Split documents into chunks
# -------------------------
def text_split(minimal_docs: List[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    return text_splitter.split_documents(minimal_docs)

texts_chunk = text_split(minimal_docs)
print(f"Number of chunks: {len(texts_chunk)}")

# -------------------------
# Load HuggingFace embeddings
# -------------------------
# embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
# embedding = SentenceTransformer(embedding_model_name)

from langchain.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


# Test embedding
vector = embedding.embed_query("Hello world")
print("Vector length:", len(vector))


Number of chunks: 5859
Vector length: 384


In [None]:

# -------------------------
#  Setup Pinecone
# -------------------------
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "medical-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,  # Must match embedding dimension
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(index_name)

# -------------------------
# 6️⃣ Create vector store
# -------------------------
vectorstore = PineconeVectorStore(
    index=index,
    embedding=embedding
)
retriever = vectorstore.as_retriever(search_kwargs={"k":3})


In [None]:

# -------------------------
# Load Local Hugging Face LLM (TinyLlama)
# -------------------------
class LocalHFLLM(LLM):
    pipeline: any

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        outputs = self.pipeline(
            prompt,
            max_new_tokens=256,
            temperature=0.2,
            do_sample=True
        )
        return outputs[0]["generated_text"][len(prompt):]

    @property
    def _llm_type(self) -> str:
        return "local_hf"

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype="auto"
)

hf_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

chatModel = LocalHFLLM(pipeline=hf_pipeline)


Some parameters are on the meta device because they were offloaded to the cpu and disk.


In [None]:

# -------------------------
# Create RAG chain
# -------------------------
system_prompt = (
    "You are a medical assistant. Use ONLY the provided context to answer. "
    "If the answer is not in the context, say you do not know. "
    "Use at most three concise sentences.\n\n{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

qa_chain = create_stuff_documents_chain(
    llm=chatModel,
    prompt=prompt
)

rag_chain = create_retrieval_chain(
    retriever=retriever,
    combine_docs_chain=qa_chain
)


In [11]:

# -------------------------
# 9️⃣ Run query
# -------------------------
query = "What is Acromegaly and gigantism?"
response = rag_chain.invoke({"input": query})
print("Answer:", response["answer"])


Answer: 
Acromegaly is a disorder in which the abnormal release of a
particular chemical from the pituitary gland in the brain causes
increased growth in bone and soft tissues, as well as a variety
of other disturbances throughout the body. This chemical released
from the pituitary gland

Whitehouse Station, NJ: Merck Research Laboratories, 1997.
Larsen, D. E., ed. Mayo Clinic Family Health Book.New York:
William Morrow and Co., Inc., 1996.
John T. Lohr, PhD
Acromegaly and gigantism
Definition
Acromegaly is a disorder in which the abnormal release of a
particular chemical from the pituitary gland in the brain causes
increased growth in bone and soft tissues, as well as a variety
of other disturbances throughout the body. This chemical released
from the pituitary gland

mone Excess: Acromegaly and Gigantism.” In Harrison’s
Principles of Internal Medicine
