In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()
key = os.getenv('GEMINI_API_KEY')

In [3]:
from langchain_community.document_loaders import PyPDFLoader

In [4]:
%pwd

'd:\\vscode\\gen ai\\interview questions generator'

In [5]:
loader = PyPDFLoader('data/SDG.pdf')
content = loader.load()

In [6]:
question_gen=""
for page in content:
    question_gen += page.page_content
question_gen    

'IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15 yea

In [7]:
from langchain_text_splitters import TokenTextSplitter


In [8]:
splitter_ques_gen = TokenTextSplitter(
    chunk_size=10000,chunk_overlap=200
)

In [9]:
chunk_ques_gen = splitter_ques_gen.split_text(question_gen)

In [10]:
len(chunk_ques_gen)

1

In [11]:
type(chunk_ques_gen)

list

In [12]:
from langchain_core.documents import Document

In [13]:
document_ques_gen = [Document(page_content=t) for t in chunk_ques_gen]

In [14]:
document_ques_gen

[Document(metadata={}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence tha

In [15]:
splitter_ans_gen = TokenTextSplitter(chunk_size=1000,chunk_overlap=100)

In [16]:
document_answer_gen=splitter_ans_gen.split_documents(document_ques_gen)

In [17]:
document_answer_gen

[Document(metadata={}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence tha

In [18]:
len(document_answer_gen)

5

In [19]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model='gemini-2.5-pro',api_key=key)

In [20]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [21]:
refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)


In [22]:
from langchain_classic.prompts import PromptTemplate

In [23]:
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template,input_variables=['text'])

In [24]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(template=refine_template,input_variables=['existing_answer','text'])

In [25]:
from langchain_core.output_parsers import StrOutputParser

In [26]:
initial_chain = PROMPT_QUESTIONS | llm | StrOutputParser()
refine_chain = REFINE_PROMPT_QUESTIONS | llm | StrOutputParser()

# summarize chain

In [27]:
initial_input = {"text": document_answer_gen[0].page_content}
current_questions = initial_chain.invoke(initial_input)
print("Initial Questions:\n", current_questions)
print("---")

# 2. Loop through the rest of the documents
for i, doc in enumerate(document_answer_gen[1:]):
    print(f"Refining with document {i+1}...")
    refine_input = {
        "existing_answer": current_questions, 
        "text": doc.page_content
    }
    current_questions = refine_chain.invoke(refine_input)
    print("Refined Questions:\n", current_questions)
    print("---")

print("Final Questions:\n", current_questions)

Initial Questions:
 Of course. Here are a set of questions designed to test a programmer's skills in data extraction, data modeling, and algorithmic logic, using the provided text as the source material.

### QUESTIONS:

#### **Category 1: Data Extraction & Text Parsing**

1.  **Question:** Write a function `extract_key_numbers(text)` that parses the provided text and returns a dictionary containing the following specific integer values:
    *   `creation_year`: The year the plan was created.
    *   `target_year`: The year the SDGs are to be fulfilled by.
    *   `num_countries`: The number of countries that created the plan.
    *   `num_goals`: The total number of goals in the plan.
    *   `undp_presence`: The number of countries and territories the UNDP is present in.

2.  **Question:** The text describes several specific goals in all-caps. Write a script that reads the text and extracts the full, multi-line titles of each of these goals. The output should be a list of strings, wh

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit.
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2
Please retry in 37.525851018s. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds:

Refined Questions:
 Of course. Based on the new context provided, I have refined the original questions to be more relevant and answerable. Questions that relied on information no longer present in the text have been replaced with new ones that test similar skills using the available material.

Here is the refined set of practice questions.

### **QUESTIONS:**

#### **Category 1: Data Extraction & Text Parsing**

1.  **Question:** Write a function `extract_key_metrics(text)` that parses the provided text and returns a dictionary containing the following specific numerical values:
    *   `primary_education_enrolment_rate`: The total enrolment rate in developing regions for primary education (as a percentage).
    *   `water_scarcity_percentage`: The percentage of people around the world affected by water scarcity.
    *   `electricity_access_increase`: The number of people (in billions) who gained access to electricity between 1990 and 2010.
    *   `sdg_target_year`: The primary targe

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit.
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2
Please retry in 44.282461289s. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds:

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit.
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2
Please retry in 35.392579979s. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 35
}
]

In [None]:
from langchain_nomic import NomicEmbeddings

embeddings = NomicEmbeddings(
    model="nomic-embed-text-v1.5",
    # dimensionality=256, # Optional: for Matryoshka-capable models
    # inference_mode="local", # Optional: for local embedding
    # device="gpu", # Optional: for local embedding on a specific device
)

In [31]:
from langchain_community.vectorstores import FAISS

In [32]:
vector_store = FAISS.from_documents(document_answer_gen,embeddings)

ImportError: Could not import faiss python package. Please install it with `pip install faiss-gpu` (for CUDA supported GPU) or `pip install faiss-cpu` (depending on Python version).