In [1]:
# Step 1: Install Required Libraries

!pip install openai faiss-cpu langchain chromadb




In [2]:
!pip install langchain-community



In [3]:
!pip install tiktoken



In [21]:
!pip install transformers torch accelerate langchain


In [22]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

In [23]:
# Step 1: Load embedding model (for FAISS)
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Sample documents (to store in FAISS)
documents = [
    Document(page_content="Diabetes is a chronic disease caused by insulin resistance."),
    Document(page_content="Metformin is commonly used to treat Type 2 Diabetes."),
    Document(page_content="A recent study in 2024 shows GLP-1 agonists help with weight loss."),
    Document(page_content="Insulin therapy is a standard treatment for Type 1 diabetes."),
    Document(page_content="A new drug, Tirzepatide, has shown promise in managing blood sugar levels."),
    Document(page_content="Recent advancements in diabetes research include closed-loop insulin delivery systems."),
]


In [24]:
# Step 2: Create FAISS vector store
vectorstore = FAISS.from_documents(documents, embedding_function)

In [25]:
# Step 3: Load FLAN-T5 Model
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda")

In [26]:
# Step 4: Create a text-generation pipeline with optimized settings
llm_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=4096,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.2,
    num_return_sequences=1,
    do_sample=True
)

Device set to use cuda:0


In [27]:

# Step 5: Use HuggingFacePipeline as LLM
llm = HuggingFacePipeline(pipeline=llm_pipeline)




In [28]:
# Step 6: Improve Retrieval - get top 5 most relevant documents
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

In [34]:

# Step 7: Modify the prompt to force the model to use all retrieved documents
question = (
    "You are an expert medical assistant. Based on the following documents, summarize "
    "the latest treatments for diabetes in detail. If multiple treatments are mentioned, "
    "explain each one clearly.\n\nRetrieved Documents:\n"
)
retrieved_docs = retriever.get_relevant_documents("What are the latest treatments for diabetes type 2?")
for doc in retrieved_docs:
    question += f"- {doc.page_content}\n"

In [35]:
# Step 8: Use `stuff` chain to properly combine multiple documents
combine_documents_chain = load_qa_chain(llm, chain_type="stuff")


In [36]:

# Step 9: Create RAG Chain with explicit document combination
rag_chain = RetrievalQA(retriever=retriever, combine_documents_chain=combine_documents_chain)


In [37]:

# Step 10: Get a response
response = rag_chain.run(question)

In [38]:

# Step 11: Print Retrieved Docs (for debugging)
print("\n Retrieved Documents:")
for doc in retrieved_docs:
    print("-", doc.page_content)

print("\n RAG Response:", response)


 Retrieved Documents:
- Insulin therapy is a standard treatment for Type 1 diabetes.
- Metformin is commonly used to treat Type 2 Diabetes.
- Recent advancements in diabetes research include closed-loop insulin delivery systems.
- Diabetes is a chronic disease caused by insulin resistance.
- A new drug, Tirzepatide, has shown promise in managing blood sugar levels.

 RAG Response: Metformin is commonly used to treat Type 2 Diabetes.
