In [8]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [9]:
import pickle
import pandas as pd
from langchain import hub
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

with open('data.pkl', 'rb') as file:
    arr = pickle.load(file)

docs = [Document(page_content=item) for item in arr]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [10]:
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")



In [11]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [12]:
question = """
How has Apple's total net sales changed over time?
"""

rag_chain.invoke(question)

"Apple's total net sales showed fluctuations over time, with an increase of 8% in the first nine months of 2022 compared to the same period in 2021. However, in 2023, total net sales decreased by 3% in the first nine months compared to 2022. This trend highlights variability in sales performance across different periods."

In [17]:
question = """
Can any trends be identified in Apple's Services segment revenue over the reported periods?
"""

rag_chain.invoke(question)

"Yes, trends in Apple's Services segment revenue show consistent year-over-year growth in net sales, primarily driven by increases in advertising, cloud services, and the App Store. This growth was observed in the third quarter of both 2022 and 2023, as well as during the first nine months of those years. However, despite revenue growth, there has been a decrease in the Services gross margin percentage in 2023 compared to 2022."

In [18]:
question = """
Are there any notable changes in Apple's liquidity position or cash flows as reported in these 10-Qs?
"""

rag_chain.invoke(question)

"The context provided does not contain specific information regarding notable changes in Apple's liquidity position or cash flows as reported in the 10-Qs. Therefore, I don't know the answer to the question."

In [22]:
file_path = 'questions_with_answers.csv'
df = pd.read_csv(file_path)

df['Generated Answers'] = df['Question'].apply(lambda question: rag_chain.invoke(question))

df.to_csv(file_path, index=False)