In [6]:
import os
from dotenv import load_dotenv

In [7]:
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [8]:
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [9]:
from langchain.document_loaders import PyPDFLoader


In [10]:
%pwd

'/Users/skt/Documents/LLM Projects/Interview-Question-Creator/research'

In [11]:
%cd ..

/Users/skt/Documents/LLM Projects/Interview-Question-Creator


In [12]:
file_path = "data/SDG.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

In [13]:
len(data)


24

In [14]:
question_gen = ""

for page in data:
    question_gen += page.page_content

In [15]:
print(question_gen)


IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD 
CAME TOGETHER TO FACE THE FUTURE.
And what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. 
Not just in some faraway place, but in their own cities and towns and villages.
They knew things didn’t have to be this way. They knew we had enough  
food to feed the world, but that it wasn’t getting shared. They knew there 
were medicines for HIV and other diseases, but they cost a lot. They knew  
that earthquakes and floods were inevitable, but that the high death  
tolls were not. 
They also knew that billions of people worldwide shared their hope for a 
better future.
So leaders from these countries created a plan called the Sustainable 
Development Goals (SDGs). This set of 17 goals imagines a future just 15 years 
off that would be rid of poverty and hunger, and safe from the worst effects of 
climate change. It’s an ambitious plan. 
But there’s ample evidence that we can succeed. In the past 15 years, the 
inte

In [17]:
from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter


In [24]:
splitter_ques_gen = RecursiveCharacterTextSplitter(
    chunk_size= 20000,
    chunk_overlap = 200,
    separators=["\n"]
)

In [25]:
chunk_question_gen=splitter_ques_gen.split_text(question_gen)

In [26]:
len(chunk_question_gen)

1

In [27]:
from langchain.docstore.document import Document

In [28]:
document_ques_gen = [Document(page_content = t) for t in chunk_question_gen]


In [29]:
document_ques_gen


[Document(page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can s

In [30]:
splitter_ans_gen = RecursiveCharacterTextSplitter(
    chunk_size = 5000,
    chunk_overlap = 100,
    separators=["\n"]
)

In [31]:
document_answer_gen = splitter_ans_gen.split_documents(
    document_ques_gen
)

In [32]:
len(document_answer_gen)


4

In [33]:
from langchain_google_genai import ChatGoogleGenerativeAI


  from .autonotebook import tqdm as notebook_tqdm


In [34]:
llm_ques_gen_pipeline  = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

I0000 00:00:1722066320.423608   10650 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache
I0000 00:00:1722066320.432320   10650 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
I0000 00:00:1722066320.433470   10650 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [43]:
prompt_template = """
You are an expert at creating questions based on given materials and documentation.
Your goal is to prepare an sustainable energy engineer for their exam.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the engineer for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [36]:
from langchain.prompts import PromptTemplate


In [45]:
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=['text'])


In [44]:
refine_template = ("""
You are an expert at creating questions based on given materials and documentation.
Your goal is to prepare an sustainable energy engineer for their exam.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)

In [46]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

In [47]:
from langchain.chains.summarize import load_summarize_chain


In [48]:
ques_gen_chain = load_summarize_chain(llm = llm_ques_gen_pipeline, 
                                          chain_type = "refine", 
                                          verbose = False, 
                                          question_prompt=PROMPT_QUESTIONS, 
                                          refine_prompt=REFINE_PROMPT_QUESTIONS)

In [49]:
ques = ques_gen_chain.run(document_ques_gen)

print(ques)

## Sustainable Development Goals (SDGs): Exam Prep Questions

**Understanding the SDGs:**

1. **What prompted the creation of the Sustainable Development Goals (SDGs)?** 
2. **What are the SDGs, and what is their intended timeframe?**
3. **How did the international community's success in reducing extreme poverty demonstrate the feasibility of achieving ambitious goals?**
4. **What is the role of the United Nations Development Programme (UNDP) in relation to the SDGs?**

**Goal 1: No Poverty**

5. **Despite halving extreme poverty by 2015, what challenge remains in achieving Goal 1 by 2030?**
6. **How does the scale of extreme poverty still present in 2015 compare to the population of a major region?**

**Goal 2: Zero Hunger**

7. **What progress has been made in addressing hunger in the past two decades?**
8. **What specific actions are crucial for achieving Goal 2, particularly in relation to agricultural practices and support for certain groups?**
9. **What vision does Goal 2 paint f

In [50]:
ques_list = ques.split("\n")


In [51]:
ques_list


['## Sustainable Development Goals (SDGs): Exam Prep Questions',
 '',
 '**Understanding the SDGs:**',
 '',
 '1. **What prompted the creation of the Sustainable Development Goals (SDGs)?** ',
 '2. **What are the SDGs, and what is their intended timeframe?**',
 "3. **How did the international community's success in reducing extreme poverty demonstrate the feasibility of achieving ambitious goals?**",
 '4. **What is the role of the United Nations Development Programme (UNDP) in relation to the SDGs?**',
 '',
 '**Goal 1: No Poverty**',
 '',
 '5. **Despite halving extreme poverty by 2015, what challenge remains in achieving Goal 1 by 2030?**',
 '6. **How does the scale of extreme poverty still present in 2015 compare to the population of a major region?**',
 '',
 '**Goal 2: Zero Hunger**',
 '',
 '7. **What progress has been made in addressing hunger in the past two decades?**',
 '8. **What specific actions are crucial for achieving Goal 2, particularly in relation to agricultural practices 

In [62]:
filtered_data = [item for item in ques_list if any(item.startswith(f'{i}. ') for i in range(1, 10))]
filtered_data =[item.replace('**', '') for item in filtered_data]
filtered_data

['1. **What prompted the creation of the Sustainable Development Goals (SDGs)?** ',
 '2. **What are the SDGs, and what is their intended timeframe?**',
 "3. **How did the international community's success in reducing extreme poverty demonstrate the feasibility of achieving ambitious goals?**",
 '4. **What is the role of the United Nations Development Programme (UNDP) in relation to the SDGs?**',
 '5. **Despite halving extreme poverty by 2015, what challenge remains in achieving Goal 1 by 2030?**',
 '6. **How does the scale of extreme poverty still present in 2015 compare to the population of a major region?**',
 '7. **What progress has been made in addressing hunger in the past two decades?**',
 '8. **What specific actions are crucial for achieving Goal 2, particularly in relation to agricultural practices and support for certain groups?**',
 '9. **What vision does Goal 2 paint for the future of global food security?**']

In [52]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

I0000 00:00:1722066929.725057   10650 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [53]:
from langchain.vectorstores import FAISS
vector_store = FAISS.from_documents(document_answer_gen, gemini_embeddings)


In [54]:
llm_ans_gen_pipeline  = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

I0000 00:00:1722067025.946139   10650 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
I0000 00:00:1722067025.947474   10650 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [55]:
from langchain.chains import RetrievalQA


In [56]:
answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_ans_gen_pipeline, 
                                               chain_type="stuff", 
                                               retriever=vector_store.as_retriever())

In [63]:
# Answer each question and save to a file
for question in filtered_data:
    print("Question: ", question)
    answer = answer_generation_chain.run(question)
    print("Answer: ", answer)
    print("--------------------------------------------------\\n\\n")
    # Save answer to file
    with open("answers.txt", "a") as f:
        f.write("Question: " + question + "\\n")
        f.write("Answer: " + answer + "\\n")
        f.write("--------------------------------------------------\\n\\n")

Question:  1. **What prompted the creation of the Sustainable Development Goals (SDGs)?** 
Answer:  Leaders from 193 countries recognized the daunting future of famines, drought, wars, plagues, poverty, and more. They knew a better future was possible and that global cooperation was necessary to achieve it, leading to the creation of the Sustainable Development Goals. 

--------------------------------------------------\n\n
Question:  2. **What are the SDGs, and what is their intended timeframe?**
Answer:  The SDGs, or Sustainable Development Goals, are a set of 17 goals agreed upon by 193 countries. They are intended to create a better future for all by the year 2030, addressing issues like poverty, hunger, health, education, gender equality, climate change, and more. 

--------------------------------------------------\n\n
Question:  3. **How did the international community's success in reducing extreme poverty demonstrate the feasibility of achieving ambitious goals?**
Answer:  The 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).