# Import Necessary Libraries

In [1]:
import os
from dotenv import load_dotenv

from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.docstore.document import Document

from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

# Load Environment Variables

In [2]:
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Change Directory

In [3]:
%cd ..

c:\Projects\GenAI-7-Projects\GenAI-Interview-Question-Creator


# Load PDF Files

In [4]:
file_path = "data"

loader = PyPDFDirectoryLoader(file_path)
data = loader.load()

In [5]:
data

[Document(page_content='', metadata={'source': 'data\\SDG.pdf', 'page': 0}),
 Document(page_content='', metadata={'source': 'data\\SDG.pdf', 'page': 1}),
 Document(page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that w

In [6]:
len(data)

24

# Combine PDF Content into a Single String

In [7]:
question_data = ""

for page in data:
    question_data += page.page_content

In [8]:
question_data

'IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15 

# Split the Combined Text into Chunks

In [9]:
splitter = TokenTextSplitter(
    model_name= "gpt-3.5-turbo",
    chunk_size = 10000,
    chunk_overlap = 200
    )

chunk = splitter.split_text(question_data)

In [10]:
type(chunk)

list

In [11]:
len(chunk)

1

In [12]:
chunk

['IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15

# Create Document Objects from Chunks

In [13]:
document_data = [Document(page_content=text) for text in chunk]

document_data

[Document(page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can s

In [14]:
type(document_data[0])

langchain_core.documents.base.Document

# Split the Documents into Smaller Chunks

In [15]:
splitter_answer = TokenTextSplitter(
    model_name= "gpt-3.5-turbo",
    chunk_size = 1000,
    chunk_overlap = 100
    )

chunk_answer = splitter_answer.split_documents(document_data)

In [16]:
len(chunk_answer)

4

In [17]:
chunk_answer

[Document(page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough  \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew  \nthat earthquakes and floods were inevitable, but that the high death  \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can s

# Initialize the Chat Model and Prompts

In [18]:
llm = ChatOpenAI(model="gpt-3.5-turbo",
                 temperature=0.4,
                )

  warn_deprecated(


In [19]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [20]:
question_prompt = PromptTemplate(template = prompt_template,input_variables=["text"])

In [21]:
refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)

In [22]:
Refine_question_prompt = PromptTemplate(
    input_variables =  ["existing_answer","text"],
    template = refine_template
)

# Load the Summarize Chain

In [23]:
chain = load_summarize_chain(
    llm = llm,
    chain_type="refine",
    question_prompt = question_prompt,
    refine_prompt = Refine_question_prompt
)

In [24]:
query = chain.run(chunk_answer)

print(query)

  warn_deprecated(


1. What is the name of the plan created by leaders from 193 countries in 2015 to tackle global issues such as poverty, hunger, and climate change?
2. What organization is leading the effort to fulfill the Sustainable Development Goals (SDGs) by the year 2030?
3. What is the ambitious goal set by the United Nations Development Programme (UNDP) to be achieved by 2030?
4. How many people around the world still live on less than $1.25 a day, according to the text?
5. What are some of the key components of the goal to end hunger and achieve food security by 2030?
6. What progress has been made in terms of child mortality and maternal mortality in the years leading up to the SDGs?
7. What are some of the challenges that prevent many children around the world from accessing education?
8. What is the total enrollment rate in developing regions for primary education as of recent years?
9. How does the text emphasize the importance of health coverage and access to medicines and vaccines for all 

# Create a Vector Store

In [25]:
embedding = OpenAIEmbeddings()

  warn_deprecated(


In [26]:
vector_store = FAISS.from_documents(chunk_answer,embedding)
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x2b1fe722d40>

In [27]:
llm_answer  = ChatOpenAI(temperature=0.3,model="gpt-3.5-turbo")

In [28]:
query

'1. What is the name of the plan created by leaders from 193 countries in 2015 to tackle global issues such as poverty, hunger, and climate change?\n2. What organization is leading the effort to fulfill the Sustainable Development Goals (SDGs) by the year 2030?\n3. What is the ambitious goal set by the United Nations Development Programme (UNDP) to be achieved by 2030?\n4. How many people around the world still live on less than $1.25 a day, according to the text?\n5. What are some of the key components of the goal to end hunger and achieve food security by 2030?\n6. What progress has been made in terms of child mortality and maternal mortality in the years leading up to the SDGs?\n7. What are some of the challenges that prevent many children around the world from accessing education?\n8. What is the total enrollment rate in developing regions for primary education as of recent years?\n9. How does the text emphasize the importance of health coverage and access to medicines and vaccines

# Split Query into Questions

In [29]:
query = query.split("\n")

In [30]:
query

['1. What is the name of the plan created by leaders from 193 countries in 2015 to tackle global issues such as poverty, hunger, and climate change?',
 '2. What organization is leading the effort to fulfill the Sustainable Development Goals (SDGs) by the year 2030?',
 '3. What is the ambitious goal set by the United Nations Development Programme (UNDP) to be achieved by 2030?',
 '4. How many people around the world still live on less than $1.25 a day, according to the text?',
 '5. What are some of the key components of the goal to end hunger and achieve food security by 2030?',
 '6. What progress has been made in terms of child mortality and maternal mortality in the years leading up to the SDGs?',
 '7. What are some of the challenges that prevent many children around the world from accessing education?',
 '8. What is the total enrollment rate in developing regions for primary education as of recent years?',
 '9. How does the text emphasize the importance of health coverage and access 

# Create a RetrievalQA Chain

In [31]:
answer_chain = RetrievalQA.from_chain_type(
    llm = llm_answer,
    chain_type="stuff",
    retriever = vector_store.as_retriever()
)

# Generate Answers for Each Question

In [32]:
for question in query:
    print("Question: ", question)
    answer = answer_chain.run(question)

    print("Answer: ",answer)
    print("------------------------------------------")

    with open("answers.txt","a") as f:
        f.write("Question: " + question + "\n")
        f.write("Answer: " + answer + "\n")
        f.write("-----------------------------------------\n")

Question:  1. What is the name of the plan created by leaders from 193 countries in 2015 to tackle global issues such as poverty, hunger, and climate change?
Answer:  The plan created by leaders from 193 countries in 2015 to tackle global issues such as poverty, hunger, and climate change is called the Sustainable Development Goals (SDGs).
------------------------------------------
Question:  2. What organization is leading the effort to fulfill the Sustainable Development Goals (SDGs) by the year 2030?
Answer:  The United Nations Development Programme (UNDP) is one of the leading organizations working to fulfill the Sustainable Development Goals (SDGs) by the year 2030.
------------------------------------------
Question:  3. What is the ambitious goal set by the United Nations Development Programme (UNDP) to be achieved by 2030?
Answer:  The ambitious goal set by the United Nations Development Programme (UNDP) to be achieved by 2030 is to end extreme poverty in all forms.
-----------