In [1]:
import os
from dotenv import load_dotenv,find_dotenv

load_dotenv()
GROQ_API_KEY=os.environ['GROQ_API_KEY']

In [2]:
from langchain_groq import ChatGroq

model=ChatGroq(model='qwen/qwen3-32b')

In [3]:
from langchain_community.document_loaders import PyPDFLoader


In [4]:
%pwd

'/Users/garikapatilokesh/Documents/GENAI/langchain_project/experiment'

In [5]:
%cd ..


/Users/garikapatilokesh/Documents/GENAI/langchain_project


In [6]:
file_path='data/SDG.pdf'
data=PyPDFLoader(file_path).load()

In [7]:
data

[Document(metadata={'source': 'data/SDG.pdf', 'page': 0}, page_content='4th SDG Y outh Summer Camp – SDG Resource Document The 2030 Agenda for Sustainable Development’s 17 Sustainable Development Goals (SDGs)   Goal: This document enables 4th SDG Youth Summer Camp participants to i) get to know the 17 SDGs, ii) explore what areas each goal covers under its targets, iii) identify targets of most interest to participants, and iv) identify synergies between the SDGs and chosen target(s).    Goal 1. End poverty in all its forms everywhere  Target 1.1 By 2030, eradicate extreme poverty for all people everywhere, currently measured as people living on less than $1.25 a day  Target 1.2 By 2030, reduce at least by half the proportion of men, women and children of all ages living in poverty in all its dimensions according to national definitions  Target 1.3 Implement nationally appropriate social protection systems and measures for all, including floors, and by 2030 achieve substantial coverage

In [8]:
string_data=''

for d in data:
    string_data+=d.page_content

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=50)

splitted_data=splitter.split_text(string_data)

In [10]:
len(splitted_data)

48

In [11]:
type(splitted_data[0])

str

In [12]:
question_llm=ChatGroq(
    model='openai/gpt-oss-120b',
    temperature=0.3
)

In [13]:
prompt_template = """
You are an expert at extracting the MOST IMPORTANT questions from any text.

Your task:
- Generate ONLY the Top 10 most important questions.
- No MCQs, no True/False, no short/long labels.
- Just write the 10 most important and meaningful questions.
- Questions must fully cover the key ideas in the text.
- Do NOT include answers.

--------------
{text}
--------------

Produce ONLY the Top 10 important questions.
QUESTIONS:
"""

In [14]:
from langchain_core.prompts import PromptTemplate

ques_template=PromptTemplate(template=prompt_template,input_variables=['text'])

In [15]:
refined_prompt = """
You are an expert exam-question generator.

We already have some draft questions:
----------------
{existing}
----------------

Here is additional context:
----------------
{text}
----------------

Refine the **existing questions** using the new context.
If the context adds nothing new, return the original questions.
Keep the same questions as the previous

QUESTIONS:
"""

In [16]:
refine_ques_prompt=PromptTemplate(
    template=refined_prompt,
    input_variables=['existing','text'])

In [17]:
from langchain.schema import Document

In [18]:
from langchain.chains.combine_documents.refine import RefineDocumentsChain
from langchain.chains.llm import LLMChain

question_chain = LLMChain(
    llm=question_llm,
    prompt=ques_template
)

refine_chain = LLMChain(
    llm=question_llm,
    prompt=refine_ques_prompt
)

ques_chain = RefineDocumentsChain(
    initial_llm_chain=question_chain,
    refine_llm_chain=refine_chain,
    document_variable_name="text",
    initial_response_name="output_text"
)

docs = [Document(page_content=string_data)]

ques = ques_chain.invoke({"input_documents": docs})

print(ques["output_text"])

  question_chain = LLMChain(


1. What are the 17 Sustainable Development Goals and the overarching objectives they seek to achieve by 2030?  
2. How do the specific targets under each goal work together to eradicate poverty, hunger, and inequality worldwide?  
3. In what ways do the goals promote universal health, quality education, gender equality, and the empowerment of women and girls?  
4. How are sustainable water and sanitation, affordable clean energy, and climate‑action integrated across multiple goals?  
5. What strategies are outlined for protecting and restoring marine and terrestrial ecosystems, biodiversity, and combating desertification?  
6. How do the goals address the creation of inclusive, safe, resilient, and sustainable cities and human settlements?  
7. What mechanisms are proposed for financing, resource mobilization, technology transfer, and capacity‑building to implement the SDGs?  
8. How will progress be monitored, data disaggregated, and accountability ensured for each target?  
9. What r

In [19]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS

In [20]:
hugging=HuggingFaceBgeEmbeddings(model_name='BAAI/bge-small-en-v1.5')


  from .autonotebook import tqdm as notebook_tqdm


In [21]:
vectors=FAISS.from_documents(docs,hugging)

In [22]:
llm_answer=ChatGroq(model='openai/gpt-oss-120b',temperature=0.3)

In [23]:
from langchain.chains import RetrievalQA
answer_chain=RetrievalQA.from_chain_type(llm=llm_answer,
                                         chain_type='stuff',
                                         retriever=vectors.as_retriever())

In [24]:
res = answer_chain.run({"query": ques["output_text"]})

  res = answer_chain.run({"query": ques["output_text"]})


In [31]:
for quesn in ques:
    print("Question: ",ques)
    answer=answer_chain.run(ques['output_text'])
    print("Answer: ",answer)
    print("------------------------------------------------\\n\\n")


Answer:  **1. The 17 Sustainable Development Goals (SDGs) and their 2030‑wide overarching ambition**  

| # | Goal (short title) | Core ambition for 2030 (as expressed in the 2030 Agenda) |
|---|--------------------|--------------------------------------------------------|
| 1 | **No Poverty** | End poverty in all its forms everywhere. |
| 2 | **Zero Hunger** | End hunger, achieve food‑security and improved nutrition, and promote sustainable agriculture. |
| 3 | **Good Health & Well‑being** | Ensure healthy lives and promote well‑being for all at all ages. |
| 4 | **Quality Education** | Ensure inclusive and equitable quality education and promote lifelong learning opportunities for all. |
| 5 | **Gender Equality** | Achieve gender equality and empower all women and girls. |
| 6 | **Clean Water & Sanitation** | Ensure availability and sustainable management of water and sanitation for all. |
| 7 | **Affordable & Clean Energy** | Ensure access to affordable, reliable, sustainable and mo