In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [11]:
from langchain_community.document_loaders import PyPDFLoader

pdf_path = "apple_10K.pdf"
loader = PyPDFLoader(pdf_path, mode = "single")
docs = loader.load()

In [12]:
# 2. split into coherent chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=10000,  
    chunk_overlap=1500, 
    separators=["\n\n", "\n", ".", "!", "?", " "],  
)
chunks = splitter.split_documents(docs)

In [13]:
len(chunks)

49

In [None]:
from langchain_openai import OpenAIEmbeddings

emb = OpenAIEmbeddings(model="text-embedding-3-small")

In [14]:
from langchain_community.vectorstores import FAISS

vectordb = FAISS.from_documents(chunks, emb)

In [15]:
vectordb.index.ntotal

49

In [19]:
vector =vectordb.index.reconstruct(0)
vector.shape

(1536,)

In [25]:
vectordb.save_local("apple_faiss_idx")

VECTOR RETRIEVAL

In [20]:
query =" What is apple's total revenue in 2024"

In [21]:
top_docs = vectordb.similarity_search(query, k=3)

In [22]:
print(top_docs[0].page_content)

Periods
Total Number
of Shares 
Purchased
Average 
Price
Paid Per 
Share
Total Number 
of Shares
Purchased as 
Part of Publicly
Announced 
Plans or 
Programs
Approximate 
Dollar Value of
Shares That May 
Yet Be Purchased
Under the Plans 
or Programs (1)
June 30, 2024 to August 3, 2024:
Open market and privately negotiated purchases  35,697 $ 224.11  35,697 
August 4, 2024 to August 31, 2024:
Open market and privately negotiated purchases  42,910 $ 221.39  42,910 
September 1, 2024 to September 28, 2024:
Open market and privately negotiated purchases  33,653 $ 222.86  33,653 
Total  112,260 $ 89,074 
(1) As of September  28, 2024, the Company was authorized by the Board to purchase up to $110 billion of the Company’s 
common stock under a share repurchase program announced on May 2, 2024, of which $20.9 billion had been util ized. 
During the fourth quarter of 2024, the Company also utilized the final $4.1 billion under its previous repurchase program, 
which was authorized in May 2023.

In [23]:
retriever = vectordb.as_retriever(search_type='similarity', search_kwargs={'k': 3})

In [24]:
top_docs = retriever.invoke(query)

Augmented Text Generation

In [28]:
# LLM
from langchain.chat_models import init_chat_model
llm = init_chat_model(
    model="gpt-4o-mini",
    temperature=0,
    model_provider="openai", 
)

# Prompt‑Template
from langchain_core.prompts import ChatPromptTemplate
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a business analyst and helpful assistant."),
    ("human", "Answer the following question as accurate and diligent as possible."
              "Do not speculate or invent facts, rely only on the provided text."
              "\n\nContext:\n{context}\n\nQuestion: {input}\nAnswer:")
])

# Helper‑Function: Documents → Context‑String
def concat_docs(context_docs):
    return "\n\n---\n\n".join(d.page_content for d in context_docs)

In [26]:
len(top_docs)

3

In [30]:
context = concat_docs(top_docs)
print(context[:500])

Periods
Total Number
of Shares 
Purchased
Average 
Price
Paid Per 
Share
Total Number 
of Shares
Purchased as 
Part of Publicly
Announced 
Plans or 
Programs
Approximate 
Dollar Value of
Shares That May 
Yet Be Purchased
Under the Plans 
or Programs (1)
June 30, 2024 to August 3, 2024:
Open market and privately negotiated purchases  35,697 $ 224.11  35,697 
August 4, 2024 to August 31, 2024:
Open market and privately negotiated purchases  42,910 $ 221.39  42,910 
September 1, 2024 to September 2


In [31]:
relevant_char = len(context)
relevant_char

29898

In [32]:
total_char = len(docs[0].page_content)
total_char 

414567

In [33]:
ratio = relevant_char / total_char * 100
ratio

7.211862015066322

In [1]:
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

emb = OpenAIEmbeddings(model="text-embedding-3-small")
vectordb = FAISS.load_local("apple_faiss_idx", emb, allow_dangerous_deserialization=True)
retriever = vectordb.as_retriever(search_kwargs={"k": 4})

In [6]:
type(retriever)

langchain_core.vectorstores.base.VectorStoreRetriever

In [11]:
llm = init_chat_model(
    model="gpt-4o-mini",
    temperature=0,
    model_provider="openai", 
)

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a business analyst and helpful assistant."),
    ("human", "Answer the following question as accurate and diligent as possible."
              "Do not speculate or invent facts, rely only on the provided text."
              "\n\nContext:\n{context}\n\nQuestion: {input}\nAnswer:")
])

def concat_docs(context_docs):
    return "\n\n---\n\n".join(d.page_content for d in context_docs)

# 7. LEL‑Chain (Retrieval + Prompt + LLM + Parsing)
rag_chain = (
    {
        "context": retriever | RunnableLambda(concat_docs),
        "input": RunnablePassthrough()
    }
    | qa_prompt
    | llm
    | StrOutputParser()
)

In [12]:
rag_chain

{
  context: VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000024D1E9339D0>, search_kwargs={'k': 4})
           | RunnableLambda(concat_docs),
  input: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a business analyst and helpful assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='Answer the following question as accurate and diligent as possible.Do not speculate or invent facts, rely only on the provided text.\n\nContext:\n{context}\n\nQuestion: {input}\nAnswer:'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00

In [13]:
result = rag_chain.invoke(input("Questions: "))
print(result)

The total net sales for Apple Inc. for the fiscal years 2024, 2023, and 2022 are as follows (in millions):

- **2024**: $391,035
- **2023**: $383,285
- **2022**: $394,328

The breakdown of net sales by significant products and services for 2024 is:

- iPhone: $201,183
- Mac: $29,984
- iPad: $26,694
- Wearables, Home and Accessories: $37,005
- Services: $96,169

Total net sales include revenue recognized from deferred revenue as well, with $7.7 billion recognized in 2024 that was included in deferred revenue as of September 30, 2023.


In [14]:
query = "What is Apple´s total revenue in 2024?"

In [15]:
retriever = vectordb.as_retriever(
    search_type="similarity",          
    search_kwargs={"k": 4}             
)
top_docs = retriever.invoke(query)

In [17]:
print(top_docs[0].page_content)

Periods
Total Number
of Shares 
Purchased
Average 
Price
Paid Per 
Share
Total Number 
of Shares
Purchased as 
Part of Publicly
Announced 
Plans or 
Programs
Approximate 
Dollar Value of
Shares That May 
Yet Be Purchased
Under the Plans 
or Programs (1)
June 30, 2024 to August 3, 2024:
Open market and privately negotiated purchases  35,697 $ 224.11  35,697 
August 4, 2024 to August 31, 2024:
Open market and privately negotiated purchases  42,910 $ 221.39  42,910 
September 1, 2024 to September 28, 2024:
Open market and privately negotiated purchases  33,653 $ 222.86  33,653 
Total  112,260 $ 89,074 
(1) As of September  28, 2024, the Company was authorized by the Board to purchase up to $110 billion of the Company’s 
common stock under a share repurchase program announced on May 2, 2024, of which $20.9 billion had been util ized. 
During the fourth quarter of 2024, the Company also utilized the final $4.1 billion under its previous repurchase program, 
which was authorized in May 2023.

####################Challenge 1##########################

In [18]:
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [19]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS 
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

In [20]:
embeddings = OpenAIEmbeddings(model = "text-embedding-3-small")

vectordb = FAISS.load_local("apple_faiss_idx", embeddings, allow_dangerous_deserialization=True)

retriever = vectordb.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k": 4, "score_threshold": 0.4})

llm = init_chat_model(model="gpt-4o-mini", temperature=0, model_provider="openai")

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a business analyst and helpful assistant."),
    ("human", "Answer the following question as accurate and diligent as possible."
              "Do not speculate or invent facts, rely only on the provided text."
              "\n\nContext:\n{context}\n\nQuestion: {input}\nAnswer:")
])

# 6. Helper‑Lambda: Documents → Context‑String
def concat_docs(context_docs):
    return "\n\n---\n\n".join(d.page_content for d in context_docs)

# 7. LEL‑Chain (Retrieval + Prompt + LLM + Parsing)
rag_chain = (
    {
        "context": retriever | RunnableLambda(concat_docs),
        "input": RunnablePassthrough()
    }
    | qa_prompt
    | llm
    | StrOutputParser()
)



In [21]:
result = rag_chain.invoke("What was Apple’s gross margin percentage for fiscal 2024, and how did it change vs 2023? Cite the exact line")

In [22]:
print(result)

Apple’s gross margin percentage for fiscal 2024 was 46.2%. It increased from 44.1% in 2023. The exact line is: 

"Total gross margin percentage  46.2%  44.1%  43.3%"


In [23]:
top_docs = retriever.invoke("What was Apple’s gross margin percentage for fiscal 2024, and how did it change vs 2023? Cite the exact line.")

In [24]:
len(top_docs)

3