In [None]:
import openai
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain

In [None]:
pdf_files = ["Amazon_Annual_Report.pdf", "Samsung_Annual_Report.pdf", "Apple_Annual_Report.pdf"] # Update with your own PDF files

pdf_texts = []
for pdf_file in pdf_files:
    pdfreader = PdfReader(pdf_file)
    raw_text = ""
    for i, page in enumerate(pdfreader.pages):
        content = page.extract_text()
        if content:
            raw_text += content

    pdf_texts.append(raw_text)
#This function will take a list of PDF files and return a list of text extracted from the PDF files.

In [None]:
texts = []
for pdf_text in pdf_texts:
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=800,
        chunk_overlap=200,
        length_function=len,
    )
    texts.extend(text_splitter.split_text(pdf_text))
# This function will split the text into chunks of 800 characters with an overlap of 200 characters of the text

In [4]:
API_KEY = "Your_OpenAI_API_Key"
llm = openai(
    model="gpt-3.5-turbo",
    openai_api_key=API_KEY,
   
)

In [5]:
from langchain_community.embeddings import HuggingFaceEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
#This function will load the Hugging Face embeddings model

In [6]:
document_search = FAISS.from_texts(texts, embeddings)
#The above line will create a FAISS index for the text chunks

In [7]:
chain = load_qa_chain(AzureChatOpenAI(deployment_name = "Your_deployment_name", model_name="Your_model_name",),chain_type="stuff")
#This function will load the question answering chain

In [None]:
query = "What is the trend analysis of Apple and Samsung revenue by product segment in the year 2021?"
docs = document_search.similarity_search(query)
answer = chain.run(input_documents=docs, question=query)
print(answer)
#This function will search for the answer to the query in the text chunks and return the answer

In [None]:
query1 = "Generate the trend analysis of Apple and Samsung revenue.Provide the following information in the form of bullet points."
docs = document_search.similarity_search(query1)
prompt_template = "As an expert financial analyst please be quantitative and specific{}"
answer = chain.run(input_documents=docs, prompt=prompt_template.format(query1),question = query1)
print(answer)
#This function will generate the result using the prompt template