Prompt Chaining

In [1]:
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

VECTOR_STORE_DIR = "vectorstores/business_intel_faiss"

embeddings = OpenAIEmbeddings(model="text-embedding-3-small", chunk_size=32)
vector_store = FAISS.load_local(VECTOR_STORE_DIR, embeddings, allow_dangerous_deserialization=True)

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

planner = ChatPromptTemplate.from_messages([
    ("system","You are a research planner. Break the task into 2–4 atomic sub-questions."),
    ("human","Task: {question}\nList the sub-questions only.")
])
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

def retrieve_per_subq(inputs):
    subs = [s.strip("- ").strip() for s in inputs["plan"].split("\n") if s.strip()]
    results = []
    for s in subs:
        docs = vector_store.similarity_search(s, k=4)
        results.append({"subq": s, "docs": docs})
    return {"steps": results, "question": inputs["question"]}

composer = ChatPromptTemplate.from_messages([
    ("system","You are a precise financial assistant. Use only the provided excerpts. Cite sources by filename."),
    ("human","Original question: {question}\n\nContext:\n{ctx}\n\nAnswer concisely with citations.")
])

chain = (
    {"question": RunnablePassthrough()}
    | {"plan": planner | llm | (lambda x: x.content), "question": RunnablePassthrough()}
    | RunnableLambda(retrieve_per_subq)
    | (lambda x: {"question": x["question"],
                  "ctx": "\n\n".join(
                      f"[{i+1}] {step['subq']}\n" + "\n".join(f"- {d.page_content[:600]} (src={d.metadata.get('source','?')})"
                                                               for d in step["docs"])
                      for i, step in enumerate(x["steps"])
                  )})
    | composer
    | llm
)

In [3]:
chain.invoke({"question": "What guidance did Tesla change last quarter? Cite sources."}).content

'Tesla changed its guidance in the most recent quarter by recognizing a significant increase in services and other revenue, which rose by $558 million, or 44%, compared to the same period the previous year. This increase was primarily driven by higher used vehicle revenue and growth in various service-related revenues (source: data\\processed_data\\TSLA\\10-Q_2023-04-24.md). Additionally, Tesla initiated restructuring actions to reduce costs and improve efficiency, which included recognizing $583 million in employee termination expenses (source: data\\processed_data\\TSLA\\10-Q_2024-07-24.md).'

Meta-Prompting

In [7]:
STYLE_GUIDE = """
- Audience: equity analysts.
- Format: 3 bullets max + a 'Sources' list with files.
- Be conservative; if unsure, say so and ask for the exact filing/doc.
- Never invent numbers; quote with units and period (e.g., $B, %, FY2024).
"""
meta = ChatPromptTemplate.from_messages([
    ("system", "Follow this style guide:\n" + STYLE_GUIDE),
    ("system", "If the question is numeric, include a short calc note."),
    ("human",
     "Question: {question}\n\n"
     "Context:\n{context}\n\n"
     "Write the answer following the style guide.")
])

In [8]:
chain = (
    {"question": RunnablePassthrough()}
    | {"plan": planner | llm | (lambda x: x.content), "question": RunnablePassthrough()}
    | RunnableLambda(retrieve_per_subq)
    | (lambda x: {
        "question": x["question"],
        "context": "\n\n".join(
            f"[{i+1}] {step['subq']}\n" +
            "\n".join(
                f"- {d.page_content[:600]} (src={d.metadata.get('source','?')})"
                for d in step["docs"]
            )
            for i, step in enumerate(x["steps"])
        )
    })
    | meta
    | llm
)

In [9]:
result = chain.invoke({
    "question": "Summarize Tesla’s FY2024 guidance update in 3 bullet points. Include exact numbers and cite the source files."
})

print(result.content)

- Tesla projects FY2024 revenue to exceed $ 100 billion, with vehicle deliveries anticipated to reach approximately 2 million units (source: data\processed_data\TSLA\10-K_2025-01-30.md).
- The company aims to enhance manufacturing capacity, focusing on new models like the Cybertruck and Tesla Semi, while ramping production at Gigafactories to their installed capacities (source: data\processed_data\TSLA\10-K_2025-01-30.md).
- Tesla has initiated restructuring actions expected to incur costs exceeding $ 350 million, primarily related to employee termination expenses, to improve operational efficiency (source: data\processed_data\TSLA\10-Q_2024-04-24.md).

**Sources:**
- data\processed_data\TSLA\10-K_2025-01-30.md
- data\processed_data\TSLA\10-Q_2024-04-24.md


RAG-Fusion

In [16]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

query_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

multi_query_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "Generate 3–5 diverse search queries that would help retrieve relevant passages "
     "from earnings reports, 10-K/10-Q filings, and financial news."),
    ("human", "User question:\n{question}")
])

def generate_search_queries(question: str, max_n=5):
    msgs = multi_query_prompt.format_messages(question=question)
    resp = query_llm.invoke(msgs).content

    queries = []
    for line in resp.splitlines():
        q = line.strip("-• ").strip()
        if q and q not in queries:
            queries.append(q)

    return queries[:max_n]

In [17]:
from collections import defaultdict

def rag_fusion_retrieve(question: str, k_per_query=4, k_final=12):
    """Multi-query + Reciprocal Rank Fusion (RRF)."""
    alt_queries = generate_search_queries(question)
    all_queries = [question] + alt_queries  # include original question

    rankings = []
    for q in all_queries:
        docs = vector_store.similarity_search(q, k=k_per_query)
        rankings.append(docs)

    # Reciprocal Rank Fusion
    scores = defaultdict(float)
    doc_by_key = {}
    k0 = 60  # smoothing constant

    for docs in rankings:
        for rank, d in enumerate(docs):
            key = (
                d.metadata.get("source"),
                d.metadata.get("page") or d.metadata.get("loc"),
                d.page_content[:200]
            )
            if key not in doc_by_key:
                doc_by_key[key] = d
            scores[key] += 1 / (k0 + rank + 1)

    # sort by RRF score
    sorted_keys = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
    fused_docs = [doc_by_key[k] for k, _ in sorted_keys[:k_final]]

    return fused_docs

In [18]:
def build_context(docs, max_chars=6000):
    lines, used = [], 0
    for d in docs:
        src = d.metadata.get("source") or "unknown"
        txt = d.page_content.replace("\n", " ")
        snippet = f"- {txt[:800]} (src={src})"
        if used + len(snippet) > max_chars:
            break
        lines.append(snippet)
        used += len(snippet)
    return "\n".join(lines)


In [19]:
answer_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

def rag_fusion_answer(question: str):
    fused_docs = rag_fusion_retrieve(question)
    context = build_context(fused_docs)

    prompt = f"""
You are a financial analysis assistant. Use ONLY the context below.
If information is missing, say so. Cite sources by filename.

Question:
{question}

Context:
{context}

Answer:
"""
    return answer_llm.invoke(prompt).content


In [22]:
rag_fusion_answer("Summarize Tesla's Performance with citations.")


"Tesla's performance in recent quarters has shown significant developments, particularly in revenue growth and operational restructuring.\n\n1. **Financial Performance**: In the third quarter of 2023, Tesla reported a notable increase in services and other revenue, which rose by $558 million (44%) compared to the same period in 2022. This growth was primarily driven by increased used vehicle sales, maintenance services, and other revenue streams such as paid Supercharging and insurance services (src=data\\processed_data\\TSLA\\8-K_2023-10-18.md).\n\n2. **Energy Generation and Storage**: The energy generation and storage segment experienced a remarkable revenue increase of $913 million (148%) in the first quarter of 2023 compared to the same quarter in 2022. This surge was attributed to higher deployments of Megapack and increased solar cash and loan deployments (src=data\\processed_data\\TSLA\\10-Q_2023-04-24.md).\n\n3. **Restructuring Efforts**: In the second quarter of 2024, Tesla in

Self-Consistancy

In [23]:
import numpy as np
from langchain_openai import ChatOpenAI

# LLM used for answering
sc_llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,   # keep deterministic per call; we introduce diversity via MMR + small variations
)

def build_context(docs, max_chars: int = 6000) -> str:
    """Turn retrieved docs into a compact, source-tagged context string."""
    lines, used = [], 0
    for d in docs:
        src = d.metadata.get("source") or d.metadata.get("file_path") or "unknown"
        txt = d.page_content.strip().replace("\n", " ")
        snippet = f"- {txt[:800]} (src={src})"
        if used + len(snippet) > max_chars:
            break
        lines.append(snippet)
        used += len(snippet)
    return "\n".join(lines)

In [24]:
def self_consistent_answer(question: str, n: int = 5):
    """
    Self-consistency over RAG:
    - sample 'n' different retrieval contexts via MMR
    - answer each
    - pick the answer with the most citations as a simple consensus proxy
    """
    candidates = []

    for i in range(n):
        # Sample a slightly different lambda_mult for MMR and clamp it
        lam = float(np.clip(0.6 + 0.08 * np.random.randn(), 0.1, 0.9))

        # 1) Retrieve with MMR
        docs = vector_store.max_marginal_relevance_search(
            question,
            k=6,
            lambda_mult=lam,
        )

        # 2) Build context
        ctx = build_context(docs)

        # 3) Ask the LLM using that context
        prompt = f"""
You are a careful financial analysis assistant. Use ONLY the context below.
If the context is insufficient to answer part of the question, say so explicitly.
Always cite sources in parentheses like (src=FILENAME).

Question:
{question}

Context:
{ctx}

Answer (with citations):
"""
        resp = sc_llm.invoke(prompt).content

        candidates.append({
            "answer": resp,
            "lambda_mult": lam,
            "docs": docs,
        })

    # 4) Naive "consensus": pick answer with most citations
    best = max(candidates, key=lambda c: c["answer"].count("(src="))

    return best["answer"], candidates

In [26]:
q = "Calculate Tesla's car manufacturing revenue growth QoQ and YoY for the last two quarters."

final_answer, all_candidates = self_consistent_answer(q, n=7)

print("=== FINAL (SELF-CONSISTENT) ANSWER ===\n")
print(final_answer)

# Optional: inspect candidates
for i, c in enumerate(all_candidates, 1):
    print(f"\n--- Candidate {i} (lambda_mult={c['lambda_mult']:.3f}) ---")
    print(c["answer"])

=== FINAL (SELF-CONSISTENT) ANSWER ===

To calculate Tesla's car manufacturing revenue growth quarter-over-quarter (QoQ) and year-over-year (YoY) for the last two quarters, we need the revenue figures for those periods.

1. **YoY Growth**:
   - For the three months ended March 31, 2023, automotive sales revenue increased by $3.36 billion, or 22%, compared to the same period in 2022 (src=data\processed_data\TSLA\10-Q_2023-04-24.md).
   - For the three months ended June 30, 2024, automotive sales revenue decreased by $1.89 billion, or 9%, compared to the same period in 2023 (src=data\processed_data\TSLA\10-Q_2024-07-24.md).

2. **QoQ Growth**:
   - The revenue for the three months ended March 31, 2023, is not explicitly stated in the context, so we cannot calculate the QoQ growth for the quarter ending June 30, 2024, without knowing the revenue for the quarter ending March 31, 2024.

In summary, we have the following information:
- YoY growth for March 31, 2023: +22%
- YoY growth for Jun