In [41]:
import os
from getpass import getpass

# Set up OpenAI API key
groq_api_key = getpass("Enter your Groq key: ")
os.environ["GROQ_API_KEY"] = groq_api_key

Enter your Groq key: ··········


In [23]:
!pip install -q langchain openai chromadb tiktoken pypdf langchain_openai

In [4]:
!pip install -q langchain-community

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━[0m [32m1.4/2.5 MB[0m [31m42.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.[0m[31m
[0m

In [42]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [43]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [26]:
!pip install langchain-groq langchain duckduckgo-search langchain_community ddgs  --quiet

In [44]:
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferWindowMemory

llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0.0, max_tokens=1024)
memory = ConversationBufferWindowMemory(memory_key="chat_history", return_messages=True, k=5)

In [45]:
loader = PyPDFLoader("/content/Guidance-Document-on-TB-Mukt-Bharat-Abhiyan_0.pdf")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_documents(docs)

In [46]:
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"  # lightweight and fast
)

In [18]:
pip install langchain faiss-cpu sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [47]:
vectorstore = FAISS.from_documents(chunks, embedding_model)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

In [52]:
from langchain.chains import RetrievalQA

system_instructions = """
You are TB-AwareGPT, an assistant for NGO volunteers.
Answer using ONLY the provided manual excerpts. Do NOT provide medical advice beyond the manual.
If the answer is not in the manual, say: "I cannot find that in the manual; please refer to the local PHC or call 1800-11-6666."
If the question is not related to TB, please say "Question not related to TB, please ask question related to TB"
When volunteers prefer Hindi, produce answers in Hindi. Keep replies short and actionable.
"""

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [53]:
def answer_query(user_name, question, lang="en"):
    # get retrieved docs and answer
    result = qa_chain({"query": question})
    answer = result["result"]  # the LLM-produced answer
    # Localization
    if lang.lower().startswith("hi"):
        # ask LLM to translate/compose in Hindi, but still constrained
        prompt = f"Translate and adapt to conversational Hindi the following factual answer (do not add new info):\n\n{answer}"
        answer_hi = llm(prompt)
        return answer_hi.content
    return answer

In [54]:
def generate_quiz(section_text, n_questions=3, lang="en"):
    prompt = f"""
    Create {n_questions} short quiz items (question, three options, correct option, one-line explanation)
    based ONLY on the following text: {section_text}
    Output as JSON list: [{{"q":"", "opts":["","", ""], "a":0, "ex":"..."}}, ...]
    """
    resp = llm(prompt)
    # parse resp.content into structured quiz (use json.loads after ensuring valid JSON)
    return resp.content

def grade_answer(quiz_item, given_answer_index):
    correct = quiz_item["a"]
    if given_answer_index == correct:
        return True, "Correct: " + quiz_item["ex"]
    return False, f"Incorrect. Correct: option {correct+1}. {quiz_item['ex']}"

In [55]:
query = "What are the common symptoms of TB I should tell people about?"
result = qa_chain.invoke({"query": query})
print("Answer:", result["result"])

Answer: You should tell people that the common symptoms of TB include:

1. Cough
2. Fever
3. Chest pain
4. Blood in sputum
5. Night sweats
6. Loss of appetite
7. Weight loss
8. Weakness or fatigue

Additionally, you should also inform them that organ-specific extrapulmonary TB symptoms may include:

1. Swelling in the neck
2. Joint pain or backache
3. Headache or confusion
4. Blood in urine
5. Abdominal pain or constipation

It's essential to emphasize that TB is curable with timely diagnosis and adherence to medication.


In [56]:
query = "Who is Sachin Tendulkar"
result = qa_chain.invoke({"query": query})
print("Answer:", result["result"])

Answer: Sachin Ramesh Tendulkar is a former Indian international cricketer and a widely regarded as one of the greatest batsmen in the history of cricket. He was born on April 24, 1973, in Mumbai, India.

Tendulkar made his international debut in 1989 and went on to play for the Indian national team for over two decades, earning the nickname "The Master Blaster." He is the highest run-scorer in both Test and One Day International (ODI) cricket, and he holds numerous records in the sport.

Tendulkar's achievements include:

* Scoring 100 international centuries (both Test and ODI)
* Holding the record for the most runs scored in Test cricket (15,921)
* Holding the record for the most runs scored in ODI cricket (18,426)
* Being the first batsman to score a double century in a One Day International (200)
* Being the first batsman to score 50 centuries in Test cricket

Tendulkar was awarded the Bharat Ratna, India's highest civilian honor, in 2014 for his contributions to Indian cricket. H