In [1]:
import re
from sentence_transformers import SentenceTransformer, util
import statistics
import os
import requests
import pandas as pd
from datetime import datetime, timedelta

# from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
import openai
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load OPENAPI Key
import os


In [19]:
# Helper function to display markdown
from IPython.display import Markdown, display
def md(text):
    display(Markdown(text))

## Web scrape Apple

In [3]:
import requests
from bs4 import BeautifulSoup
import os

def fetch_and_save_transcript(url, filename='apple_q4_2024_transcript.txt'):
    # Send a GET request to the URL
    response = requests.get(url)
    response.raise_for_status()

    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the article-body div
    article_body = soup.find('div', class_='article-body')
    if not article_body:
        raise ValueError("Could not find the article body.")

    # Remove all h2 tags from the article body
    for h2 in article_body.find_all('h2'):
        h2.decompose()

    # Extract all paragraph text
    paragraphs = article_body.find_all('p')
    transcript = '\n'.join([para.get_text() for para in paragraphs])

    # Save to a .txt file
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(transcript)

    print(f"Transcript saved to '{filename}'")

# URL of the earnings call transcript
url = 'https://www.fool.com/earnings/call-transcripts/2024/10/31/apple-aapl-q4-2024-earnings-call-transcript/'

# Run the function
fetch_and_save_transcript(url)


Transcript saved to 'apple_q4_2024_transcript.txt'


In [4]:
file_path = "apple_q4_2024_transcript.txt"

with open(file_path, 'r', encoding='utf-8') as file:
    transcript = file.read()

print(transcript[:5000])  # Print first 500 characters for preview


Image source: The Motley Fool.
Apple (AAPL 1.49%)Q4 2024 Earnings CallOct 31, 2024, 5:00 p.m. ET
Suhasini Chandramouli -- Director, Investor Relations
Good afternoon, and welcome to the Apple Q4 fiscal year 2024 earnings conference call. My name is Suhasini Chandramouli, director of investor relations. Today's call is being recorded. Speaking first today are Apple's CEO, Tim Cook; and CFO, Luca Maestri; and they'll be joined by Kevan Parekh, vice president of financial planning and analysis.
After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including, without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation, and future business outlook, including the potential impact of macroeconomic conditions on the company's business and results of operations. These statements involve risks and

In [23]:
import re
import json

def chunk_transcript(text):
    # Pattern to detect speakers including Operator
    speaker_pattern = re.compile(r'^(?:[A-Z][a-z]+(?: [A-Z][a-z]+)* -- .+|Operator)$', re.MULTILINE)
    chunks = []

    matches = list(speaker_pattern.finditer(text))
    pending_question = None
    question_speaker = None
    chunk_id = 1

    for i, match in enumerate(matches):
        start = match.end()
        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
        speaker_line = match.group(0).strip()
        speaker_text = text[start:end].strip().replace('\n', ' ')

        # Skip empty text
        if not speaker_text:
            continue

        # Skip Operator chunks
        if speaker_line == "Operator":
            continue

        # Store analyst question temporarily
        if "Analyst" in speaker_line:
            pending_question = speaker_text
            question_speaker = speaker_line
            continue

        # Build final chunk with or without question
        chunk = {
            "chunk_id": f"chunk_{chunk_id:03}",
            "speaker": speaker_line,
            "text": speaker_text,
            "question": pending_question if pending_question else ""
        }

        chunks.append(chunk)
        chunk_id += 1
        pending_question = None  # Clear after attaching to next answer

    return chunks


# Example usage
chunks = chunk_transcript(transcript)

# Save to JSON
with open("apple_q4_2024_chunks.json", "w", encoding="utf-8") as f:
    json.dump(chunks, f, indent=2)

print(chunks[0])


{'chunk_id': 'chunk_001', 'speaker': 'Suhasini Chandramouli -- Director, Investor Relations', 'text': "Good afternoon, and welcome to the Apple Q4 fiscal year 2024 earnings conference call. My name is Suhasini Chandramouli, director of investor relations. Today's call is being recorded. Speaking first today are Apple's CEO, Tim Cook; and CFO, Luca Maestri; and they'll be joined by Kevan Parekh, vice president of financial planning and analysis. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including, without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation, and future business outlook, including the potential impact of macroeconomic conditions on the company's business and results of operations. These statements involve risks and uncertainties that may cause actual results or 

## RAG

### Convert into FAISS Database for semantic search

In [15]:
import json
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings


# Convert documents
documents = [
    Document(
        page_content=entry["text"],
        metadata={
            "chunk_id": entry["chunk_id"],
            "speaker": entry["speaker"]
        }
    )
    for entry in chunks if entry["text"].strip()  
]

# Initialize embedding model
embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002")

# Build FAISS index
faiss_index = FAISS.from_documents(documents, embedding_model)



## Using Cross Encoder 

In [9]:
from sentence_transformers import CrossEncoder

# Load the cross-encoder model
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

# Retrieving and reranking


# Retrieve relevant doc
This function performs semantic search using a vector store (like FAISS). It takes a query, searches for the top-k most similar document chunks based on vector similarity (e.g., cosine similarity), and returns those documents. It uses embeddings to find documents that are meaningfully related to the query, not just keyword matches.

#  Rerank documents 
This function re-ranks retrieved documents using a cross-encoder, which evaluates each (query, document) pair for relevance.
It first prepares inputs as pairs: [query, doc]
Then uses the cross_encoder.predict() method to assign a relevance score to each pair
After scoring, it attaches those scores to the docs and sorts them by score (highest first)
Finally, it returns the reordered list of documents, now ranked by the cross-encoder’s more accurate assessment of relevance

# Combined pipleline

It combines speed (bi-encoder) and accuracy (cross-encoder) — a common best practice in modern RAG pipelines. 

## Sequentially
1. Does fast initial retrieval using the vector store (bi-encoder), returns top k rough matches.

2. rerank_documents(...), takes those results and re-scores them using a cross-encoder. Returns the same docs, but sorted by deep relevance.

In [24]:
def retrieve_relevant_docs(query, vector_store, k=5):
    # Retrieve top-k similar document chunks for the query
    retrieved_docs = vector_store.similarity_search(query, k=k)
    return retrieved_docs



def retrieve_and_rerank(query, vector_store, cross_encoder, initial_k=10, final_k=5):
    # Step 1: Initial retrieval using bi-encoder
    initial_retrieved_docs = retrieve_relevant_docs(query, vector_store, k=initial_k)

    # Step 2: Re-rank the initially retrieved documents using cross-encoder
    reranked_docs = rerank_documents(query, initial_retrieved_docs, cross_encoder)

    # Step 3: Select the top-k documents after re-ranking
    top_k_docs = reranked_docs[:final_k]

    return top_k_docs



def rerank_documents(query, retrieved_docs, cross_encoder):
    # Prepare the inputs for the cross-encoder
    cross_encoder_inputs = [[query, doc.page_content] for doc in retrieved_docs]

    # Compute relevance scores
    relevance_scores = cross_encoder.predict(cross_encoder_inputs)

    # Attach scores to documents
    pairs_list = []
    for idx, doc in enumerate(retrieved_docs):
        pairs_list.append((doc, relevance_scores[idx]))

    # Sort documents by relevance score in descending order
    sorted_docs = sorted(pairs_list, key=lambda x: x[1], reverse=True)

    # Final output
    reranked_docs = [doc for doc, _ in sorted_docs]

    return reranked_docs


In [32]:
def build_prompt(retrieved_docs, query, prompt_engineering):
    context_chunks = []

    for i, doc in enumerate(retrieved_docs, start=1):
        # Use only speaker metadata now
        speaker = doc.metadata.get('speaker', 'Unknown')
        source_info = f"\n[Source: {speaker}]"

        # Add question if it exists
        question = doc.metadata.get("question", "")
        if question:
            chunk_text = f"Q: {question}\nA: {doc.page_content}"
        else:
            chunk_text = doc.page_content

        chunk_text = chunk_text.replace("\n", " ")
        context_chunks.append(f"{i}. {chunk_text}{source_info}")

    # Join all context chunks
    context = "\n\n".join(context_chunks)

    # Prompt template
    prompt = (
        "You are a financial analyst reviewing an earnings call transcript. Your task is to extract and summarize key factual claims and sentiment-related insights expressed by management that can later be verified against the company's 10-K filing."
        " The 10-K contains detailed data in sections such as the Management Discussion and Analysis (MD&A), Financial Statements, Business Overview, and Risk Factors.\n\n"

        "Instructions:\n\n"
        "Based on the retrieved context from earnings call transcripts below:\n\n"
        "#### Context/Retrieved Statements:\n{context}\n\n"
        "Identify and extract factual claims regarding the company’s performance, outlook, and any sentiment indications (for example, optimism about growth, caution regarding risks, or confidence in future performance).\n\n"
        "For each claim, provide the following details in a structured format:\n\n"
        "Claim Text: A concise statement of the fact or sentiment.\n\n"
        "[Source: speaker]\n\n"
        "Metric/Detail (if applicable): Any quantifiable data (e.g., “revenue growth of 15%”, “EBITDA margin improvement”).\n\n"
        "Relevant Reporting Period: Indicate the fiscal year or quarter mentioned (e.g., “FY2023”).\n\n"
        "Target 10-K Section: Suggest which section of the 10-K is most appropriate to verify this claim (for example, “MD&A”, “Financial Statements”, “Risk Factors”, “Business Overview”).\n\n"
        "#### Query: {query}\n\n"
    ).format(context=context, query=query, prompt_engineering=prompt_engineering)

    return prompt



prompt_engineering = ""

query = "Analyze Apples's sentiment level."
# Retrieve and rerank documents using our FAISS index (assumed to be defined elsewhere as faiss_index)
retrieved_docs = retrieve_and_rerank(query, faiss_index, cross_encoder, initial_k=10, final_k=5)

# Build the prompt using the retrieved documents
prompt = build_prompt(retrieved_docs, query, prompt_engineering)
md("======================== Constructed Prompt: ========================")
md(prompt)


======================== Constructed Prompt: ========================

You are a financial analyst reviewing an earnings call transcript. Your task is to extract and summarize key factual claims and sentiment-related insights expressed by management that can later be verified against the company's 10-K filing. The 10-K contains detailed data in sections such as the Management Discussion and Analysis (MD&A), Financial Statements, Business Overview, and Risk Factors.

Instructions:

Based on the retrieved context from earnings call transcripts below:

#### Context/Retrieved Statements:
1. Q (from Wamsi Mohan -- Analyst): OK. Thanks, Tim. And then as a follow-up, maybe this is a little premature. But how is Apple, at a high level, prepared to potentially deal with any tariffs that might come post this election cycle? And if not exactly, help -- perhaps you can just help investors think about some of the things Apple has done already to try to insulate from some of these impacts -- potential impacts? A: You know, I wouldn't want to speculate about those sorts of things, and so I'm going to punt on that one.
[Source: Timothy Donald Cook -- Chief Executive Officer]

2. Q (from Samik Chatterjee -- Analyst): OK. OK. And for my follow-up, Tim, during the quarter or, I think, over the last 90 days, we had the quotes come out in relation to the DOJ relative to the Google sort of revenue-sharing agreement that you have with them. How do you sort of look at it going forward in terms of emphasizing the role that Apple has in that ecosystem with Safari and sort of the potential outcome that you're looking at? Thank you. A: You know, I don't want to speculate on that from a legal point of view. It's an ongoing case. And I will save that for another day.
[Source: Timothy Donald Cook -- Chief Executive Officer]

3. Q (from Ben Reitzes -- Melius Research -- Analyst): Hey, thanks a lot. And I'll echo those comments about Luca. We'll miss you and good luck. And my question is with regard to iPhone again. And with regard to the fourth quarter is my first question -- or sorry, the fourth calendar quarter, your first quarter. When you look at mid- to low single-digit revenue growth, do you expect the iPhone to grow faster? And what are you thinking about in the answer to that question with regard to China, which keeps improving each quarter? And then I have just a follow-up. Thanks. A: Yeah. You know, Ben, we are not providing that level of color today. Yes, we've said that we expect total company revenue to grow low to mid-single digits. Keep in mind, Apple Intelligence, as Tim said, is rolling out over time, both features and languages. And we just had a number of exciting launches just this week from the Apple Intelligence feature to the new Mac. So, we'll leave it at that. We've given you the total for the company and some pretty good direction on services, which we expect to continue to grow at a similar rate than what we've seen in fiscal '24.
[Source: Luca Maestri -- Senior Vice President, Chief Financial Officer]

4. Q (from Krish Sankar -- Analyst): Two of those. One, can the Apple Intelligence actually help? And how the services growth rate -- A: Keep in mind that we have released a lot of APIs, and developers will be taking advantage of those APIs. That release has occurred as well, and of course, more are coming. And so, I definitely believe that a lot of developers will be taking advantage of Apple Intelligence in a big way. And what that does to services, I'll not forecast, but I would say that from an ecosystem point of view, I think it will be great for the user and the user experience.
[Source: Timothy Donald Cook -- Chief Executive Officer]

5. Q (from Michael Ng -- Goldman Sachs -- Analyst): Hey. Good afternoon. I just have two. The first one is for Tim on Apple Intelligence. I was wondering if you could just expand a little bit on some of the early feedback to Apple Intelligence, both for iOS 18.1 but also the developer beta so far, and whether you would attribute Apple Intelligence to any of the strong iPhone performance that we've seen to date. Thanks. A: Thanks, Michael. As I noted in my comments, just this week on Monday, we made the first set of Apple Intelligence features available in U.S. English for iPhone, iPad, and Mac. And this includes things like systemwide writing tools that help you refine your writing, a more natural conversational Siri, more intelligent Photos app, including the ability to create movies simply by typing a description, which is really cool, and new ways to prioritize and stay in the moment with notification summaries and priority messages. There's also email summaries and email priority. We're getting a lot of positive feedback from developers and customers. And in fact, if you just look at the first three days, which is all we have obviously from Monday, the 18.1 adoption is twice as fast as the 17.1 adoption was in the year-ago quarter. And so, there's definitely interest out there for Apple Intelligence. Carrying on in the quarter, we are looking forward to bringing even more features in December, and this will include even more powerful writing tools and visual intelligence experience that builds on Apple Intelligence and ChatGPT integration in addition to other features as well as we'll bring localized English to several countries that include the U.K., Australia, and Canada. So, it's going to be quite a software quarter between the release on Monday and the release in December. And then as we turn the corner to '25, we'll have more languages rolling out in -- starting in April as well and more features as well. And so, it's a very, very strong drumbeat, and we couldn't be more excited about it.
[Source: Timothy Donald Cook -- Chief Executive Officer]

Identify and extract factual claims regarding the company’s performance, outlook, and any sentiment indications (for example, optimism about growth, caution regarding risks, or confidence in future performance).

For each claim, provide the following details in a structured format:

Claim Text: A concise statement of the fact or sentiment.

[Source: speaker]

Metric/Detail (if applicable): Any quantifiable data (e.g., “revenue growth of 15%”, “EBITDA margin improvement”).

Relevant Reporting Period: Indicate the fiscal year or quarter mentioned (e.g., “FY2023”).

Target 10-K Section: Suggest which section of the 10-K is most appropriate to verify this claim (for example, “MD&A”, “Financial Statements”, “Risk Factors”, “Business Overview”).

#### Query: Analyze Apples's sentiment level.



In [33]:

def generate_insight(prompt, model="gpt-4-turbo", temperature=0.1, max_tokens=512):
    """
    Generate analysis using the given prompt via the OpenAI ChatCompletion API.
    """
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are interested in analyzing a company's sentiment level."},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    return response.choices[0].message.content


# Generate an insight using GPT
insight = generate_insight(prompt)
md("======================== Generated Analysis: ========================")
md(insight)


======================== Generated Analysis: ========================

### Analysis of Apple's Sentiment Level Based on Earnings Call Transcript

#### Positive Sentiment Indicators:
1. **Expectation of Revenue Growth:**
   - **Claim Text:** Apple expects total company revenue to grow low to mid-single digits.
   - **Source:** Luca Maestri -- Senior Vice President, Chief Financial Officer
   - **Metric/Detail:** Low to mid-single-digit revenue growth
   - **Relevant Reporting Period:** Fiscal Year 2024
   - **Target 10-K Section:** Management Discussion and Analysis (MD&A)

2. **Strong Adoption of New Features:**
   - **Claim Text:** The adoption of iOS 18.1 is twice as fast as the 17.1 adoption in the year-ago quarter.
   - **Source:** Timothy Donald Cook -- Chief Executive Officer
   - **Metric/Detail:** Adoption rate of iOS 18.1 is twice that of iOS 17.1
   - **Relevant Reporting Period:** Quarter following the release of iOS 18.1
   - **Target 10-K Section:** Management Discussion and Analysis (MD&A)

3. **Positive Feedback and Upcoming Features:**
   - **Claim Text:** Receiving a lot of positive feedback from developers and customers on Apple Intelligence features; more powerful features planned for release.
   - **Source:** Timothy Donald Cook -- Chief Executive Officer
   - **Metric/Detail:** Positive feedback and planned feature releases
   - **Relevant Reporting Period:** Fiscal Year 2024
   - **Target 10-K Section:** Management Discussion and Analysis (MD&A)

#### Neutral to Cautious Sentiment Indicators:
1. **Non-disclosure on Specific Growth Metrics for iPhone:**
   - **Claim Text:** Apple is not providing specific growth metrics for iPhone in the current disclosure.
   - **Source:** Luca Maestri -- Senior Vice President, Chief Financial Officer
   - **Metric/Detail:** Non-disclosure of specific growth metrics
   - **Relevant Reporting Period:** Fiscal Year 2024
   - **Target 10-K Section:** Management Discussion and Analysis (MD&A)

2. **Avoidance of Legal and Tariff Speculation:**
   - **Claim Text:** Tim Cook avoids speculating on potential impacts of tariffs and ongoing legal cases.
   - **Source:** Timothy Donald Cook -- Chief Executive Officer
   - **Metric/Detail:** Non-speculative stance on legal and economic uncertainties
   - **Relevant Reporting Period:** Not

In [53]:
def evaluate_citation(generated_text, retrieved_docs):

    pattern = r"- \*\*Source:\*\*\s*(.+)"
    citations = re.findall(pattern, generated_text)

    if not citations:
        print("No citations found in the generated text.")
        return 0.0

    matched = 0
    for cited in citations:
        cited_speaker = cited.strip().split("--")[0].strip().lower()

        for doc in retrieved_docs:
            doc_speaker = doc.metadata.get("speaker", "").split("--")[0].strip().lower()

            if cited_speaker == doc_speaker:
                matched += 1
                break

    citation_score = matched / len(citations)
    return citation_score



# Generation evaluation using Groundedness Check
citation_score = evaluate_citation(insight, retrieved_docs)
md("### Generation Citation score:")
print(citation_score)


### Generation Citation score:

1.0


# Cosine similarity test

Computes the semantic similarity between a generated claim and a retrieved evidence passage using a pre-trained sentence transformer model. This verifies whether the generated content is truly supported by the retrieved documents, helping detect hallucinations and ensuring factual accuracy by grounding model outputs in actual source evidence.


In [58]:
# Markdown output
def md(text):
    display(Markdown(text))

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

def compute_cosine_similarity(claim, evidence):
    embeddings = model.encode([claim, evidence], convert_to_tensor=True)
    cosine_sim = util.cos_sim(embeddings[0], embeddings[1])
    return cosine_sim.item()

def evaluate_groundedness_cosine(generated_text, retrieved_docs, threshold=0.5):
    """
    Evaluates groundedness of each claim in generated output using cosine similarity.
    Matches claims to retrieved docs via speaker metadata and compares to (question + answer) content.
    """
    # Extract all Claim Texts and Sources from the markdown format
    claim_pattern = r"- \*\*Claim Text:\*\*\s*(.+)"
    source_pattern = r"- \*\*Source:\*\*\s*(.+)"
    claims = re.findall(claim_pattern, generated_text)
    sources = re.findall(source_pattern, generated_text)

    total_claims = len(claims)
    supported_claims = 0
    sim_scores = []

    md("### Generation Groundedness Check:")

    for claim_text, cited_speaker in zip(claims, sources):
        claim_text = claim_text.strip()
        cited_speaker = cited_speaker.split("--")[0].strip().lower()

        # Match with the most relevant doc based on speaker
        matched_evidence = None
        for doc in retrieved_docs:
            doc_speaker = doc.metadata.get("speaker", "").split("--")[0].strip().lower()
            if cited_speaker == doc_speaker:
                question = doc.metadata.get("question", "").strip()
                answer = doc.page_content.strip()
                matched_evidence = f"Q: {question} A: {answer}" if question else answer
                break

        if matched_evidence:
            similarity = compute_cosine_similarity(claim_text, matched_evidence)
            print(f"\nClaim: {claim_text}\nEvidence: {matched_evidence}\nSimilarity: {similarity:.4f}")
            sim_scores.append(similarity)
            if similarity >= threshold:
                supported_claims += 1
        else:
            print(f"\nClaim: {claim_text}\nNo matching evidence found for: {cited_speaker}")

    groundedness_score = supported_claims / total_claims if total_claims > 0 else 0.0

    md("#### Average Similarity Score:")
    print(statistics.mean(sim_scores) if sim_scores else "N/A")

    md("#### Generation Groundedness Score:")
    print(f"{groundedness_score:.2f}")

    return groundedness_score

evaluate_groundedness_cosine(insight, retrieved_docs)

### Generation Groundedness Check:


Claim: Apple expects total company revenue to grow low to mid-single digits.
Evidence: Q (from Ben Reitzes -- Melius Research -- Analyst): Hey, thanks a lot. And I'll echo those comments about Luca. We'll miss you and good luck. And my question is with regard to iPhone again. And with regard to the fourth quarter is my first question -- or sorry, the fourth calendar quarter, your first quarter. When you look at mid- to low single-digit revenue growth, do you expect the iPhone to grow faster? And what are you thinking about in the answer to that question with regard to China, which keeps improving each quarter? And then I have just a follow-up. Thanks. A: Yeah. You know, Ben, we are not providing that level of color today. Yes, we've said that we expect total company revenue to grow low to mid-single digits. Keep in mind, Apple Intelligence, as Tim said, is rolling out over time, both features and languages. And we just had a number of exciting launches just this week from the Apple In

#### Average Similarity Score:

0.4210689067840576


#### Generation Groundedness Score:

0.40


0.4

# Add on retrieval evaluation