In [2]:
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [4]:
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [5]:
from langchain.document_loaders import PyPDFLoader

In [6]:
%pwd

'/workspaces/ai/research'

In [7]:
%cd ..

/workspaces/ai


In [8]:
%pwd

'/workspaces/ai'

In [9]:
file_path = "data/SDG.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

In [10]:
data

[Document(metadata={'source': 'data/SDG.pdf', 'page': 0}, page_content=''),
 Document(metadata={'source': 'data/SDG.pdf', 'page': 1}, page_content=''),
 Document(metadata={'source': 'data/SDG.pdf', 'page': 2}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goal

In [11]:
len(data)

24

In [12]:
question_gen = ""

for page in data:
    question_gen += page.page_content

In [13]:
question_gen

'IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15 

In [14]:
from langchain.text_splitter import TokenTextSplitter

In [15]:
splitter_ques_gen = TokenTextSplitter(
    model_name= "gpt-3.5-turbo",
    chunk_size= 10000,
    chunk_overlap = 200
)

In [16]:
chunk_ques_gen = splitter_ques_gen.split_text(question_gen)

In [17]:
len(chunk_ques_gen)

1

In [18]:
chunk_ques_gen

['IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15

In [19]:
from langchain.docstore.document import Document

In [20]:
document_ques_gen = [Document(page_content = t) for t in chunk_ques_gen]

In [21]:
document_ques_gen

[Document(metadata={}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence 

In [22]:
splitter_ans_gen = TokenTextSplitter(
    model_name = 'gpt-3.5-turbo',
    chunk_size = 1000,
    chunk_overlap = 100
)

In [23]:
document_answer_gen = splitter_ans_gen.split_documents(
    document_ques_gen
)

In [24]:
document_answer_gen

[Document(metadata={}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence 

In [25]:
len(document_answer_gen)

4

In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [27]:
llm_ques_gen_pipeline = ChatGoogleGenerativeAI(
    model = 'gemini-1.5-flash',
    temperature = 0.3
)

In [28]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [29]:
from langchain.prompts import PromptTemplate

In [30]:
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=['text'])

In [31]:
refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)

In [32]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

In [33]:
from langchain.chains.summarize import load_summarize_chain

In [34]:
ques_gen_chain = load_summarize_chain(llm = llm_ques_gen_pipeline, 
                                          chain_type = "refine", 
                                          verbose = True, 
                                          question_prompt=PROMPT_QUESTIONS, 
                                          refine_prompt=REFINE_PROMPT_QUESTIONS)

In [35]:
ques = ques_gen_chain.run(document_ques_gen)

print(ques)

  ques = ques_gen_chain.run(document_ques_gen)




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD 
CAME TOGETHER TO FACE THE FUTURE.
And what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. 
Not just in some faraway place, but in their own cities and towns and villages.
They knew things didn’t have to be this way. They knew we had enough  
food to feed the world, but that it wasn’t getting shared. They knew there 
were medicines for HIV and other diseases, but they cost a lot. They knew  
that earthquakes and floods were inevitable, but that the high death  
tolls were not. 
They also knew that billions of people worldwide shared their hope for a

In [36]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [37]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [38]:
from langchain.vectorstores import FAISS
from IPython.display import Markdown

In [39]:
vector_store = FAISS.from_documents(document_answer_gen, embeddings)

In [40]:
llm_answer_gen = ChatGoogleGenerativeAI(model = 'gemini-1.5-flash',temperature = 0.1)

In [41]:
ques

'## Coding Test Preparation Questions:\n\nThese questions are designed to test your understanding of the provided text, focusing on information extraction, analysis, and potential coding applications.\n\n**1. Data Structures:**\n\n* **a)  Representing Goals:**  If you were to create a data structure to store information about each Sustainable Development Goal (SDG), what data fields would you include? Consider factors like goal name, description, target year, progress indicators, etc.\n* **b)  Relationship Mapping:** How would you represent the relationships between different SDGs using data structures? For example, how would you show that "Ensure Availability and Sustainable Management of Water and Sanitation for All" (SDG 6) is related to "End Hunger, Achieve Food Security and Improved Nutrition and Promote Sustainable Agriculture" (SDG 2)?\n\n**2. Algorithms:**\n\n* **a)  Progress Tracking:** Design an algorithm to track the progress of each SDG over time. Consider how you would sto

In [42]:
ques_list = ques.split("\n")

In [43]:
ques_list

['## Coding Test Preparation Questions:',
 '',
 'These questions are designed to test your understanding of the provided text, focusing on information extraction, analysis, and potential coding applications.',
 '',
 '**1. Data Structures:**',
 '',
 '* **a)  Representing Goals:**  If you were to create a data structure to store information about each Sustainable Development Goal (SDG), what data fields would you include? Consider factors like goal name, description, target year, progress indicators, etc.',
 '* **b)  Relationship Mapping:** How would you represent the relationships between different SDGs using data structures? For example, how would you show that "Ensure Availability and Sustainable Management of Water and Sanitation for All" (SDG 6) is related to "End Hunger, Achieve Food Security and Improved Nutrition and Promote Sustainable Agriculture" (SDG 2)?',
 '',
 '**2. Algorithms:**',
 '',
 '* **a)  Progress Tracking:** Design an algorithm to track the progress of each SDG ove

In [44]:
from langchain.chains import RetrievalQA

In [45]:
answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen, 
                                               chain_type="stuff", 
                                               retriever=vector_store.as_retriever())

In [46]:
# Answer each question and save to a file
for question in ques_list:
    print("Question: ", question)
    answer = answer_generation_chain.run(question)
    print("Answer: ", answer)
    print("--------------------------------------------------\\n\\n")
    # Save answer to file
    with open("answers.txt", "a") as f:
        f.write("Question: " + question + "\\n")
        f.write("Answer: " + answer + "\\n")
        f.write("--------------------------------------------------\\n\\n")

Question:  ## Coding Test Preparation Questions:


Answer:  Please provide me with the specific coding test you are preparing for. I need more information to give you relevant questions. 

For example, tell me:

* **What company or organization is the test for?** 
* **What programming languages are you expected to use?**
* **What level of experience is the test targeting (entry-level, junior, senior)?**
* **What topics are likely to be covered (data structures, algorithms, etc.)?**

Once I have this information, I can provide you with a list of coding test preparation questions tailored to your specific needs. 

--------------------------------------------------\n\n
Question:  
Answer:  What is the question you would like me to answer? Please provide me with the question so I can use the context to give you the best possible answer. 

--------------------------------------------------\n\n
Question:  These questions are designed to test your understanding of the provided text, focusing on information extraction, analysis, and potential 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).