In [4]:
import os
from dotenv import load_dotenv


In [None]:
os.environ["GOOGLE_API_KEY"]=""

In [6]:
from langchain.document_loaders import  PyPDFLoader


In [8]:
file_path="data/SDG.pdf"
loader=PyPDFLoader(file_path)
data=loader.load()

In [10]:
len(data)

24

In [11]:
question_gen=""
for page in data:
    question_gen+=page.page_content

In [12]:
question_gen

'IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15 yea

In [None]:
pip install tiktoken

In [21]:
from langchain.text_splitter import TokenTextSplitter
splitter_ques_gen=TokenTextSplitter(
    encoding_name="cl100k_base",
    chunk_size=10000,
    chunk_overlap=200
)

In [22]:
chunk_ques_gen=splitter_ques_gen.split_text(question_gen)

In [23]:
len(chunk_ques_gen)

1

In [24]:
#convert list to document 
from langchain.docstore.document import Document
docs_ques_gen=[Document(page_content=chunk) for chunk in chunk_ques_gen]

In [26]:
type(docs_ques_gen[0])

langchain_core.documents.base.Document

In [28]:
from langchain.text_splitter import TokenTextSplitter
splitter_ans_gen=TokenTextSplitter(
    encoding_name="cl100k_base",
    chunk_size=1000,
    chunk_overlap=100
)

In [30]:
docs_ans_gen=splitter_ans_gen.split_documents(docs_ques_gen)

In [31]:
docs_ans_gen

[Document(metadata={}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence tha

In [32]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [33]:
llm_ques_gen_pipelinr=ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",
    temperature=0.5,
)

In [34]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [37]:
from langchain.prompts import PromptTemplate
prompt_ques=PromptTemplate.from_template(prompt_template)

In [38]:
#refining prompt so when model generates irrelevent questions it can be fixed via refined prompt
refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)


In [39]:
refine_prompt_ques = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

In [44]:
from langchain.chains.summarize import load_summarize_chain
chain_ques_gen = load_summarize_chain(
    llm_ques_gen_pipelinr,
    chain_type="refine",
    question_prompt=prompt_ques,
    refine_prompt=refine_prompt_ques,
    verbose=True,
)

In [45]:
ques=chain_ques_gen.run(docs_ques_gen)
print(ques)



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD 
CAME TOGETHER TO FACE THE FUTURE.
And what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. 
Not just in some faraway place, but in their own cities and towns and villages.
They knew things didn’t have to be this way. They knew we had enough 
food to feed the world, but that it wasn’t getting shared. They knew there 
were medicines for HIV and other diseases, but they cost a lot. They knew 
that earthquakes and floods were inevitable, but that the high death 
tolls were not. 
They also knew that billions of people worldwide shared their hope for a 
b

In [60]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [49]:
from langchain.vectorstores import FAISS


In [61]:
#intilazing faiss,faiss is faster than chroma
vectorstore = FAISS.from_documents(docs_ans_gen, embeddings)

In [57]:
ques

'Here are some questions designed to prepare coders and programmers for exams and coding tests, based on the provided text:\n\n**General Understanding & Goal Identification:**\n\n1.  What year did leaders from 193 countries come together to create the Sustainable Development Goals (SDGs)?\n2.  What is the primary overarching aim of the Sustainable Development Goals (SDGs) as stated in the text?\n3.  What is the target year for achieving the Sustainable Development Goals?\n4.  Which organization is highlighted as a leading entity working to fulfill the SDGs, and in how many countries is it present?\n5.  The text mentions several global challenges that leaders faced. List at least five of these challenges.\n\n**Specific Goal-Oriented Questions:**\n\n6.  **Goal: End Extreme Poverty:**\n    *   What was the previous target for reducing extreme poverty, and by what year was it set to be achieved?\n    *   What percentage of the world\'s population is still living in extreme poverty, and wha

In [59]:
llm_ans_gen=ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",
    temperature=0.5,
)

In [58]:
ques_list=ques.split("\n")
ques_list

['Here are some questions designed to prepare coders and programmers for exams and coding tests, based on the provided text:',
 '',
 '**General Understanding & Goal Identification:**',
 '',
 '1.  What year did leaders from 193 countries come together to create the Sustainable Development Goals (SDGs)?',
 '2.  What is the primary overarching aim of the Sustainable Development Goals (SDGs) as stated in the text?',
 '3.  What is the target year for achieving the Sustainable Development Goals?',
 '4.  Which organization is highlighted as a leading entity working to fulfill the SDGs, and in how many countries is it present?',
 '5.  The text mentions several global challenges that leaders faced. List at least five of these challenges.',
 '',
 '**Specific Goal-Oriented Questions:**',
 '',
 '6.  **Goal: End Extreme Poverty:**',
 '    *   What was the previous target for reducing extreme poverty, and by what year was it set to be achieved?',
 "    *   What percentage of the world's population i

In [70]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm_ans_gen,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)


In [71]:
import time

# Ensure your list has no empty/whitespace questions
ques_list = [q.strip() for q in ques_list if q.strip()]

# Process only the first 10 questions
for idx, question in enumerate(ques_list[:10], start=1):
    print(f"\n[{idx}] Question:", question)

    try:
        # invoke with query input
        response = qa_chain.invoke({"query": question})

        # Extract only the answer
        answer = response["result"]

        print("Answer:", answer)

        # Save to file
        with open("data/answers.txt", "a", encoding="utf-8") as f:
            f.write("Question: " + question + "\n")
            f.write("Answer: " + answer + "\n\n")
            f.write("--------------------------------------------------\n\n")

    except Exception as e:
        print(f"⚠️ Error processing question: {question}")
        print("Error details:", e)
        # Optionally log errors instead of skipping
        with open("data/answers.txt", "a", encoding="utf-8") as f:
            f.write("Question: " + question + "\n")
            f.write("Error: " + str(e) + "\n\n")
            f.write("--------------------------------------------------\n\n")

    # Delay to avoid hitting rate limits
    time.sleep(5)  # adjust (e.g. 4–6 sec) as per quota



[1] Question: Here are some questions designed to prepare coders and programmers for exams and coding tests, based on the provided text:
Answer: I understand you're looking for coding and programming exam preparation questions based on the provided text. However, the text you've provided is about the **Sustainable Development Goals (SDGs)** and doesn't contain any technical information, code snippets, or programming concepts that would be relevant for creating such questions.

To create coding and programming exam questions, I would need text that includes:

*   **Algorithmic descriptions:** Explanations of how certain algorithms work.
*   **Data structures:** Information about different data structures and their use cases.
*   **Programming language syntax or concepts:** Examples of code, explanations of language features, or problem-solving scenarios that require programming knowledge.
*   **Problem-solving scenarios:** Descriptions of technical challenges that can be solved with co

KeyboardInterrupt: 