# Importing the Dependencies

In [1]:
import os
from langchain.document_loaders import PyPDFLoader 
from langchain.embeddings import OpenAIEmbeddings 
from langchain.vectorstores import Chroma 
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI

**Using embeddings from OPENAI to embed a pdf file and Creating 3 different docs by splitting it.**

Source of the pdf file- https://docs.google.com/document/d/1VDoDY7LgRj7lsh2A1vvUFjitzvRTcWT6DCpNzM8JM_k/edit?usp=sharing

In [2]:
# path of the pdf file

pdf_path = "./OOPS.pdf"

In [3]:
from PyPDF2 import PdfReader, PdfWriter

def split_pdf(path, num_parts):
    # Open the PDF file
    with open(path, 'rb') as file:
        reader = PdfReader(file)
        
        # Get the total number of pages in the PDF
        total_pages = len(reader.pages)
        
        # Calculate the number of pages per part
        pages_per_part = total_pages // num_parts
        
        # Create a list to store the paths of the split PDF parts
        split_pdf_paths = []
        
        # Split the PDF into parts
        for part in range(num_parts):
            start_page = part * pages_per_part
            end_page = start_page + pages_per_part - 1
            
            # Handle the last part if it doesn't divide evenly
            if part == num_parts - 1:
                end_page = total_pages - 1
            
            # Create a new PDF writer
            writer = PdfWriter()
            
            # Extract pages and add them to the writer
            for page in range(start_page, end_page + 1):
                writer.add_page(reader.pages[page])
            
            # Generate a unique filename for each split part
            split_pdf_path = f"split_part_{part+1}.pdf"
            print(split_pdf_path)
            
            # Save the split part to a new PDF file
            with open(split_pdf_path, 'wb') as output_file:
                writer.write(output_file)
            
            split_pdf_paths.append(split_pdf_path)
    
    return split_pdf_paths

pdf_path = pdf_path
num_parts = 3

split_pdf_paths = split_pdf(pdf_path, num_parts)

split_part_1.pdf
split_part_2.pdf
split_part_3.pdf


**Three parts created successfully and has been saved in the same directory as of the OOPS.pdf**

# **Adding OpenAI API**

In [4]:
os.environ['OPENAI_API_KEY'] = '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'

In [5]:
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()

embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(pages, embedding=embeddings,persist_directory=".")
vectordb.persist()

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
pdf_qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0.9) ,vectordb.as_retriever(), memory=memory)

# Asking Question 1

In [8]:
query = "What is OOPS?"
result = pdf_qa({'question': query})
print("Answer: \n")
result["answer"]

Answer: 



' Object-Oriented Programming is a methodology or paradigm to design a computer programming model using classes and objects rather than functions and logics. It simplifies the software development and maintenance by providing some concepts like inheritance, encapsulation, polymorphism and abstraction.'

# Asking Question 2

In [12]:
query = "What is Abstracrion?"
result = pdf_qa({'question': query})
print("Answer:\n")
result["answer"]

Answer:



' Abstraction in Object-Oriented Programming is the process of simplifying complex real-world entities into objects that contain the essential properties and behaviors of the real-world entities. This allows software developers to reduce the complexity of a problem, making their programs easier to understand, use, and maintain.'

# Asking Question 3

In [16]:
query = "What is Inheritance"
result = pdf_qa({'question': query})
print("Answer:\n")
result["answer"]

Answer:



' \nInheritance in Object-Oriented Programming is the ability of a program or class to inherit attributes and methods from a parent or superclass. Inheritance allows objects to be reused and enables developers to create programs that are easier to maintain and extend.'

# Hence we can see that the OPENAI gpt3 is giving back the relevant document through the stored vector database.