In [2]:
import os
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [4]:
from langchain.document_loaders import PyPDFLoader
file_path = "../data/GoogleSearchTips.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()
data

[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2022-09-06T10:28:14+01:00', 'title': 'TOP TEN INTERNET SEARCH TIPS', 'author': 'Name', 'moddate': '2022-09-06T10:28:14+01:00', 'source': '../data/GoogleSearchTips.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='Library and Learning Spaces email: \nAskaLibrarian@hope.ac.uk \n  Last updated September 2022 \n \n \n \nGoogle and the Google logo are registered trademarks of Google Inc., used with permission  \n \n \nTop search tips \n \nSearch Google more effectively by having a go at the following techniques….. \n \n1. Turn off personalised searching \n \nGoogle automatically pushes certain content at you that it assumes you will be \ninterested in. This is based on: \n \ni. Where you are (geographically) \nii. What you have searched for previously \n \nIn order to a do a more objective search of the web, you need to turn off personalised \nsearching by: \n \n1. 

In [6]:
pdfText = ""
for page in data:
    pdfText += page.page_content
pdfText

'Library and Learning Spaces email: \nAskaLibrarian@hope.ac.uk \n  Last updated September 2022 \n \n \n \nGoogle and the Google logo are registered trademarks of Google Inc., used with permission  \n \n \nTop search tips \n \nSearch Google more effectively by having a go at the following techniques….. \n \n1. Turn off personalised searching \n \nGoogle automatically pushes certain content at you that it assumes you will be \ninterested in. This is based on: \n \ni. Where you are (geographically) \nii. What you have searched for previously \n \nIn order to a do a more objective search of the web, you need to turn off personalised \nsearching by: \n \n1. Ensure you are signed out of your Google Account \n2. Open the search customization settings page \n3. Turn the settings off. This will turn the setting off for anyone who uses the same \nbrowser and device as you, unless another person signs in \n \n \n2. Make sure Google finds all the words you searched for \n \nIf you search for sever

In [7]:
from langchain.text_splitter import TokenTextSplitter

splitPDFTextForQuestionGeneration = TokenTextSplitter(
    model_name= "gpt-3.5-turbo",
    chunk_size= 10000,  # Produce just 1 chunk to pass to the Open AI model for question generation
    chunk_overlap = 200
)

In [12]:

questionGenerationChunk = splitPDFTextForQuestionGeneration.split_text(pdfText)


In [13]:
# Convert the str chunk to a document 
from langchain.docstore.document import Document
questionGenerationDocument = [Document(page_content = t) for t in questionGenerationChunk]

In [23]:
splitTextForAnswerGeneration = TokenTextSplitter(
    model_name = 'gpt-3.5-turbo',
    chunk_size = 1000,
    chunk_overlap = 100
)

In [24]:
answerGenerationDocuments = splitTextForAnswerGeneration.split_documents(
    questionGenerationDocument
)

In [25]:
len(answerGenerationDocuments)

2

In [26]:
from langchain.chat_models import ChatOpenAI

In [27]:
llm_ques_gen_pipeline = ChatOpenAI(
    model = 'gpt-3.5-turbo',
    temperature = 0.3
)

  llm_ques_gen_pipeline = ChatOpenAI(


In [28]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [29]:
from langchain.prompts import PromptTemplate
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=['text'])

In [30]:
refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)


In [31]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

In [32]:
from langchain.chains.summarize import load_summarize_chain

In [33]:
ques_gen_chain = load_summarize_chain(llm = llm_ques_gen_pipeline, 
                                          chain_type = "refine", 
                                          verbose = True, 
                                          question_prompt=PROMPT_QUESTIONS, 
                                          refine_prompt=REFINE_PROMPT_QUESTIONS)

In [34]:
ques = ques_gen_chain.run(questionGenerationDocument)

print(ques)

  ques = ques_gen_chain.run(questionGenerationDocument)




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
Library and Learning Spaces email: 
AskaLibrarian@hope.ac.uk 
  Last updated September 2022 
 
 
 
Google and the Google logo are registered trademarks of Google Inc., used with permission  
 
 
Top search tips 
 
Search Google more effectively by having a go at the following techniques….. 
 
1. Turn off personalised searching 
 
Google automatically pushes certain content at you that it assumes you will be 
interested in. This is based on: 
 
i. Where you are (geographically) 
ii. What you have searched for previously 
 
In order to a do a more objective search of the web, you need to turn off personalised 
searching by: 


In [35]:
from langchain.embeddings.openai import OpenAIEmbeddings

In [36]:
embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [37]:
from langchain.vectorstores import FAISS

In [39]:
vector_store = FAISS.from_documents(answerGenerationDocuments, embeddings)

In [40]:
llm_answer_gen = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")

In [41]:
ques_list = ques.split("\n")
ques_list

['1. How can you turn off personalized searching on Google for a more objective search of the web?',
 '2. What technique can you use to ensure that all the words you searched for appear in the results on Google?',
 '3. How can you search for an exact phrase on Google?',
 '4. How can you restrict your Google search to a specific web domain?',
 '5. Why is searching for Adobe Acrobat PDF documents useful for academic work?',
 '6. How can you search for information on a specific website using Google?',
 '7. What shortcut can you use to find definitions of technical terms, phrases, or acronyms on Google?',
 '8. Why is it beneficial to search only within the title of a webpage on Google?',
 '9. How can you combine multiple search tips on Google to find relevant and high-quality results?',
 '10. How can you search for websites with information on a topic between a certain date range on Google?',
 '11. In what ways can you use Google for basic math calculations?',
 '12. How can you use Google 

In [42]:
from langchain.chains import RetrievalQA

In [43]:
answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen, 
                                               chain_type="stuff", 
                                               retriever=vector_store.as_retriever())


In [44]:
# Answer each question and save to a file
for question in ques_list:
    print("Question: ", question)
    answer = answer_generation_chain.run(question)
    print("Answer: ", answer)
    print("--------------------------------------------------\\n\\n")
    # Save answer to file
    with open("answers.txt", "a") as f:
        f.write("Question: " + question + "\\n")
        f.write("Answer: " + answer + "\\n")
        f.write("--------------------------------------------------\\n\\n")




Question:  1. How can you turn off personalized searching on Google for a more objective search of the web?
Answer:  To turn off personalized searching on Google for a more objective search of the web, you need to follow these steps:

1. Ensure you are signed out of your Google Account.
2. Open the search customization settings page.
3. Turn the settings off. This will turn the setting off for anyone who uses the same browser and device as you, unless another person signs in.
--------------------------------------------------\n\n
Question:  2. What technique can you use to ensure that all the words you searched for appear in the results on Google?
Answer:  To ensure that all the words you searched for appear in the results on Google, you can change the search settings to "Verbatim." This setting will make sure that all the words you entered into the search box are included in the results. You can access this setting by choosing "Tools" after getting your search results and then changin