#***FinExplain***

FinExplain: An AI analyst that explains daily stock price movements by combining market data (price action) with news and generating a human-style narrative.

#### Data Preprocessing

In [1]:
pip install yfinance pandas requests



In [2]:
from ast import YieldFrom
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore")

#### *Stocks Data Fetcher*

In [3]:
#Function to pull data for a ticker for a particular number of days from yahoo finance
def get_price_df(ticker:str,days_back:int=20)->pd.DataFrame:
  end = datetime.today().date()
  start = end-timedelta(days=days_back)
  df = yf.download(ticker, start=start, end=end, auto_adjust=True, progress=False)
  df = df.reset_index()
  return df

In [4]:
#Function to see % change compared to previous day
def get_daily_move(df: pd.DataFrame, date_str: str):
    target = pd.to_datetime(date_str)
    day_row = df[df["Date"] == target]

    if day_row.empty:
        return None

    idx = day_row.index[0]
    if idx == 0:
        return None

    # get scalars
    close_today = day_row["Close"].iloc[0]      # scalar
    close_prev = df.iloc[idx - 1]["Close"]      # scalar

    pct_change = (close_today - close_prev) / close_prev * 100

    return {
        "date": date_str,
        "close": float(close_today),
        "prev_close": float(close_prev),
        "pct_change": round(float(pct_change), 2)
    }

In [5]:
#Function that combines both retrieving the data for a ticker and giving the price change from previous day
def get_price_move_for(ticker: str, date_str: str, days_back: int = 30):
    df = get_price_df(ticker, days_back=days_back)
    move = get_daily_move(df, date_str)
    return move

#### *News Fetcher*

In [6]:
from datetime import datetime

def get_news_for_ticker_mock(ticker: str, date_str: str):
    """
    Mock news fetcher for development.
    Always returns 2â€“3 headlines so the rest of the pipeline can run.
    """
    return [
        {
            "title": f"{ticker} posts stronger-than-expected results",
            "description": f"Investors reacted to {ticker}'s better performance.",
            "source": {"name": "MockWire"},
            "url": "https://example.com/article1",
            "publishedAt": f"{date_str}T14:00:00Z"
        },
        {
            "title": f"Sector peers rally, lifting {ticker}",
            "description": "Broader sector strength supported the stock.",
            "source": {"name": "MockFinance"},
            "url": "https://example.com/article2",
            "publishedAt": f"{date_str}T09:30:00Z"
        }
    ]

In [7]:
news = get_news_for_ticker_mock("AAPL", "2025-11-03")
news

[{'title': 'AAPL posts stronger-than-expected results',
  'description': "Investors reacted to AAPL's better performance.",
  'source': {'name': 'MockWire'},
  'url': 'https://example.com/article1',
  'publishedAt': '2025-11-03T14:00:00Z'},
 {'title': 'Sector peers rally, lifting AAPL',
  'description': 'Broader sector strength supported the stock.',
  'source': {'name': 'MockFinance'},
  'url': 'https://example.com/article2',
  'publishedAt': '2025-11-03T09:30:00Z'}]

#### *Ranking Helper*

In [8]:
# Scores articles based on if appeared in the title,description and based on the source of the article)
IMPORTANT_SOURCES = {"Reuters", "Bloomberg", "WSJ", "Financial Times", "CNBC"}

def score_article(article, ticker):
    score = 0
    title = (article.get("title") or "").lower()
    desc = (article.get("description") or "").lower()

    if ticker.lower() in title:
        score += 3
    if ticker.lower() in desc:
        score += 1
    if article.get("source", {}).get("name") in IMPORTANT_SOURCES:
        score += 2
    return score



In [9]:
def rank_news(articles, ticker):
    return sorted(articles, key=lambda a: score_article(a, ticker), reverse=True)

In [10]:
articles = get_news_for_ticker_mock("AAPL", "2025-11-03")
ranked = rank_news(articles, "AAPL")
ranked

[{'title': 'AAPL posts stronger-than-expected results',
  'description': "Investors reacted to AAPL's better performance.",
  'source': {'name': 'MockWire'},
  'url': 'https://example.com/article1',
  'publishedAt': '2025-11-03T14:00:00Z'},
 {'title': 'Sector peers rally, lifting AAPL',
  'description': 'Broader sector strength supported the stock.',
  'source': {'name': 'MockFinance'},
  'url': 'https://example.com/article2',
  'publishedAt': '2025-11-03T09:30:00Z'}]

#### *Generate Explaination*

In [11]:
def generate_explanation(ticker: str, move: dict, ranked_articles: list):
    # 1. Handle missing price data
    if move is None:
        return f"Could not find market data for {ticker} on the selected date."

    date = move["date"]
    pct = move["pct_change"]
    direction = "rose" if pct > 0 else "fell"
    pct_abs = abs(pct)

    # 2. If no news, fall back to generic
    if not ranked_articles:
        return (
            f"On {date}, {ticker} {direction} {pct_abs:.2f}%. "
            f"No major company-specific headlines were found for this date, "
            f"so the move may have been driven by broader market or sector sentiment."
        )

    # 3. Use the top article as main driver
    top = ranked_articles[0]
    title = top.get("title", "a news report")
    source = top.get("source", {}).get("name", "a financial outlet")

    explanation = (
        f"On {date}, {ticker} {direction} {pct_abs:.2f}%. "
        f"The move appears to be linked to '{title}' reported by {source}. "
    )

    # 4. If we have more articles, acknowledge them
    if len(ranked_articles) > 1:
        explanation += "Additional coverage on the same day may have reinforced investor sentiment."

    return explanation

In [12]:
def generate_explanation_v2(ticker: str, move: dict, ranked_articles: list):
    # 1. Handle missing price data
    if move is None:
        return f"Could not find market data for {ticker} on the selected date."

    date = move["date"]
    pct = move["pct_change"]

    # Ensure pct is a scalar value and explicitly cast to float
    pct_value = float(pct.iloc[0]) if isinstance(pct, pd.Series) else float(pct)


    direction = "rose" if pct_value > 0 else "fell"
    pct_abs = abs(pct_value)

    # 2. If no news, fall back to generic
    if not ranked_articles:
        return (
            f"On {date}, {ticker} {direction} {pct_abs:.2f}%. "
            f"No major company-specific headlines were found for this date, "
            f"so the move may have been driven by broader market or sector sentiment."
        )

    # 3. Use the top article as main driver
    top = ranked_articles[0]
    title = top.get("title", "a news report")
    source = top.get("source", {}).get("name", "a financial outlet")

    explanation = (
        f"On {date}, {ticker} {direction} {pct_abs:.2f}%. "
        f"The move appears to be linked to '{title}' reported by {source}. "
    )

    # 4. If we have more articles, acknowledge them
    if len(ranked_articles) > 1:
        explanation += "Additional coverage on the same day may have reinforced investor sentiment."

    return explanation

In [13]:
def generate_explanation_structured(ticker: str, move: dict, ranked_articles: list):
    # base object
    result = {
        "ticker": ticker,
        "date": move["date"] if move else None,
        "price_move": None,
        "primary_driver": None,
        "articles_used": ranked_articles[:3],  # keep top 3 for UI
        "confidence": 0.4,  # start low
        "explanation": ""
    }

    # handle no price data
    if move is None:
        result["explanation"] = f"Could not find market data for {ticker} on the selected date."
        return result

    # we have price data
    pct = move["pct_change"]
    direction = "up" if pct > 0 else "down"
    pct_abs = abs(pct)

    result["price_move"] = {
        "pct_change": pct,
        "direction": direction
    }
    result["confidence"] += 0.2  # we have solid price data

    date = move["date"]

    # no news case
    if not ranked_articles:
        result["explanation"] = (
            f"On {date}, {ticker} {'rose' if pct>0 else 'fell'} {pct_abs:.2f}%. "
            f"No major company-specific headlines were found, so the move may reflect broader market or sector factors."
        )
        # confidence stays moderate
        return result

    # we have news
    top = ranked_articles[0]
    title = top.get("title", "a news report")
    source = top.get("source", {}).get("name", "a financial outlet")
    result["primary_driver"] = title
    result["confidence"] += 0.2  # we have at least one headline

    # if ticker in title, boost confidence
    if ticker.lower() in (title or "").lower():
        result["confidence"] += 0.2

    # cap at 1.0
    result["confidence"] = min(result["confidence"], 1.0)

    # build final text
    explanation = (
        f"On {date}, {ticker} {'rose' if pct>0 else 'fell'} {pct_abs:.2f}%. "
        f"The move appears to be linked to '{title}' reported by {source}. "
    )

    if len(ranked_articles) > 1:
        explanation += "Additional coverage on the same day may have reinforced investor sentiment."

    result["explanation"] = explanation
    return result

In [14]:
move = get_price_move_for("AAPL", "2025-11-03", days_back=30)
articles = get_news_for_ticker_mock("AAPL", "2025-11-03")
ranked = rank_news(articles, "AAPL")

structured = generate_explanation_structured("AAPL", move, ranked)
structured

{'ticker': 'AAPL',
 'date': '2025-11-03',
 'price_move': {'pct_change': -0.49, 'direction': 'down'},
 'primary_driver': 'AAPL posts stronger-than-expected results',
 'articles_used': [{'title': 'AAPL posts stronger-than-expected results',
   'description': "Investors reacted to AAPL's better performance.",
   'source': {'name': 'MockWire'},
   'url': 'https://example.com/article1',
   'publishedAt': '2025-11-03T14:00:00Z'},
  {'title': 'Sector peers rally, lifting AAPL',
   'description': 'Broader sector strength supported the stock.',
   'source': {'name': 'MockFinance'},
   'url': 'https://example.com/article2',
   'publishedAt': '2025-11-03T09:30:00Z'}],
 'confidence': 1.0,
 'explanation': "On 2025-11-03, AAPL fell 0.49%. The move appears to be linked to 'AAPL posts stronger-than-expected results' reported by MockWire. Additional coverage on the same day may have reinforced investor sentiment."}

#### *LLM prompt from structured result*

In [15]:
def llm_prompt_structured(structured:dict):
  ticker=structured.get("ticker")
  date=structured.get("date")
  explanation=structured.get("explanation")
  price_move=structured.get("price_move") or {}
  pct=structured.get("pct_change")
  direction=structured.get("direction")
  primary_driver=structured.get("primary_driver",[])
  articles = structured.get("articles_used", [])
  confidence=structured.get("confidence")

  # format articles
  article_lines = []
  for i, a in enumerate(articles, start=1):
    article_lines.append(
    f"{i}. {a.get('title')} ({a.get('source', {}).get('name','unknown')}) - {a.get('publishedAt','')}"
    )
    article_block = "\n".join(article_lines) if article_lines else "No articles found."

    prompt = f"""
You are an equity analyst. You will write a concise explanation for a portfolio manager.

Stock: {ticker}
Date: {date}
Price move: {direction} {pct}% (confidence {confidence:.2f})
Primary driver detected: {primary_driver}

News reviewed:
{article_block}

System-generated draft explanation:
{explanation}

Task:
- Rewrite the draft explanation to be clear, professional, and specific.
- If the news is weak, explicitly say the move may have been driven by sector/macro flows.
- Keep it to 2â€“3 sentences.
"""
    return prompt.strip()

In [16]:
def refine_with_llm(prompt: str, client=None, model="gpt-4o-mini"):
    """
    Takes a prompt and returns the LLM-refined explanation.
    If no client is provided, just return the prompt end as-is.
    """
    if client is None:
        # fallback: just return the prompt so the pipeline doesn't break
        return "LLM not configured. Here is the draft prompt I would send:\n\n" + prompt

    resp = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a senior equity research analyst. Be precise and concise."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,
    )
    return resp.choices[0].message.content.strip()

In [17]:
# 1) get data
move = get_price_move_for("AAPL", "2025-11-03", days_back=30)
articles = get_news_for_ticker_mock("AAPL", "2025-11-03")
ranked = rank_news(articles, "AAPL")

# 2) structured draft
structured = generate_explanation_structured("AAPL", move, ranked)

# 3) build prompt
prompt = llm_prompt_structured(structured)

# 4) send to LLM (or fallback)
final_explanation = refine_with_llm(prompt, client=None)  # put your client instead of None
print(final_explanation)

LLM not configured. Here is the draft prompt I would send:

You are an equity analyst. You will write a concise explanation for a portfolio manager.

Stock: AAPL
Date: 2025-11-03
Price move: None None% (confidence 1.00)
Primary driver detected: AAPL posts stronger-than-expected results

News reviewed:
1. AAPL posts stronger-than-expected results (MockWire) - 2025-11-03T14:00:00Z

System-generated draft explanation:
On 2025-11-03, AAPL fell 0.49%. The move appears to be linked to 'AAPL posts stronger-than-expected results' reported by MockWire. Additional coverage on the same day may have reinforced investor sentiment.

Task:
- Rewrite the draft explanation to be clear, professional, and specific.
- If the news is weak, explicitly say the move may have been driven by sector/macro flows.
- Keep it to 2â€“3 sentences.


#### *Market Agent*

In [18]:
def market_agent(ticker: str, move: dict):
    if move is None:
        return {
            "has_data": False,
            "summary": f"No market data for {ticker}.",
            "impact": "unknown"
        }

    pct = move["pct_change"]
    direction = "up" if pct > 0 else "down"
    magnitude = abs(pct)

    if magnitude >= 5:
        impact = "very_large"
    elif magnitude >= 2:
        impact = "notable"
    else:
        impact = "mild"

    return {
        "has_data": True,
        "ticker": ticker,
        "date": move["date"],
        "direction": direction,
        "pct_change": round(magnitude, 2),
        "impact": impact,
        "summary": f"{ticker} was {direction} {magnitude:.2f}% on {move['date']} ({impact} move)."
    }

#### *News Agent*

In [19]:
def news_agent(ticker: str, date: str, ranked_articles: list):
    if not ranked_articles:
        return {
            "has_news": False,
            "summary": "No relevant company-specific headlines found.",
            "drivers": []
        }

    # pick top 3
    top_articles = ranked_articles[:3]
    drivers = []

    for art in top_articles:
        title = art.get("title", "")
        source = art.get("source", {}).get("name", "unknown")
        drivers.append({
            "title": title,
            "source": source,
            "type": "company-specific"  # we can improve this later with LLM classification
        })

    summary_lines = [f"- {d['title']} ({d['source']})" for d in drivers]
    summary = f"Top headlines for {ticker} on {date}:\n" + "\n".join(summary_lines)

    return {
        "has_news": True,
        "summary": summary,
        "drivers": drivers,
        "articles_used": top_articles
    }

#### *Analyst Agent*

In [20]:
def analyst_agent(ticker: str,
                  market_view: dict,
                  news_view: dict,
                  structured: dict,
                  llm_client=None,
                  model="gpt-4o-mini"):
    """
    Combine all views and (optionally) let an LLM polish it.
    """
    # base draft from your earlier logic
    base_explanation = structured.get("explanation", "")
    date = structured.get("date")

    # build context text
    context_text = f"""
Market view:
{market_view.get('summary', '')}

News view:
{news_view.get('summary', '')}

System draft:
{base_explanation}
""".strip()

    # If no LLM, return a stitched version
    if llm_client is None:
        return (
            f"{base_explanation} "
            f"(Context: {market_view.get('impact','')} move; "
            f"{'news found' if news_view.get('has_news') else 'no strong news found'}.)"
        )

    # If LLM available, build a nicer prompt (reuse step 6 function style)
    prompt = f"""
You are a senior equity research analyst.

Stock: {ticker}
Date: {date}

{context_text}

Write a 2â€“3 sentence explanation for a portfolio manager.
- Start with the move.
- Attribute it to the most plausible driver(s).
- If news is weak, mention sector/macro flows.
- Be concise, professional.
"""
    resp = llm_client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You write professional equity notes."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,
    )
    return resp.choices[0].message.content.strip()

#### *Agentic Flow*

In [21]:
def finexplain_run(ticker: str, date_str: str, llm_client=None):
    # 1) price
    move = get_price_move_for(ticker, date_str, days_back=30)

    # 2) news
    articles = get_news_for_ticker_mock(ticker, date_str)
    ranked = rank_news(articles, ticker)

    # 3) structured draft (rule-based)
    structured = generate_explanation_structured(ticker, move, ranked)

    # 4) build LLM prompt from structured
    prompt = llm_prompt_structured(structured)

    # 5) refine with LLM
    if llm_client is not None:
        llm_refined = refine_with_llm(prompt, client=llm_client)
    else:
        llm_refined = structured["explanation"]  # fall back to rule-based

    # 6) agent views (market + news)
    market_view = market_agent(ticker, move)
    news_view = news_agent(ticker, date_str, ranked)

    # 7) final analyst note: we can just use the LLM-refined text
    # or we can let analyst_agent stitch context too.
    final_note = llm_refined

    return {
        "ticker": ticker,
        "date": date_str,
        "move": move,
        "market_view": market_view,
        "news_view": news_view,
        "structured": structured,
        "prompt_sent": prompt,
        "final_note": final_note
    }

In [22]:
result = finexplain_run("AAPL", "2025-11-03", llm_client=None)
print(result["final_note"])


On 2025-11-03, AAPL fell 0.49%. The move appears to be linked to 'AAPL posts stronger-than-expected results' reported by MockWire. Additional coverage on the same day may have reinforced investor sentiment.


In [23]:
def run_batch(tickers, dates, llm_client=None):
    rows = []
    for t in tickers:
        for d in dates:
            out = finexplain_run(t, d, llm_client=llm_client)
            move = out["move"]
            structured = out["structured"]
            news_view = out["news_view"]

            pct_change = move["pct_change"] if move else None
            magnitude = abs(pct_change) if pct_change is not None else None

            rows.append({
                "ticker": t,
                "date": d,
                "pct_change": pct_change,
                "abs_pct_change": magnitude,
                "confidence": structured["confidence"],
                "has_news": news_view["has_news"],
                "primary_driver": structured["primary_driver"],
                "final_note": out["final_note"],
                # naive quality flags
                "is_meaningful_move": magnitude is not None and magnitude >= 1.0,
                "is_high_confidence": structured["confidence"] >= 0.7,
            })
    return rows

In [24]:
!pip install streamlit pyngrok --quiet

In [30]:
%%writefile app.py
import streamlit as st
from datetime import date

# ---------------------------
# PASTE / IMPORT YOUR FUNCTIONS HERE
# (finexplain_run, etc.)
# ---------------------------

st.set_page_config(page_title="FinExplain", page_icon="ðŸ“ˆ", layout="wide")

st.title("ðŸ“ˆ FinExplain â€“ AI Stock Move Explainer")

# Sidebar inputs
st.sidebar.header("Inputs")
ticker = st.sidebar.text_input("Ticker", value="AAPL")
selected_date = st.sidebar.date_input("Date", value=date(2025, 11, 3))
run_button = st.sidebar.button("Explain")

# Main area
if run_button:
    with st.spinner("Analyzing..."):
        result = finexplain_run(ticker.upper(), selected_date.strftime("%Y-%m-%d"), llm_client=None)

    st.subheader("Final Explanation")
    st.write(result["final_note"])

    col1, col2, col3 = st.columns(3)

    with col1:
        st.markdown("**Price Move**")
        move = result["move"]
        if move:
            st.metric(
                label=f"{ticker.upper()} on {move['date']}",
                value=f"{move['pct_change']}%",
                delta=None
            )
        else:
            st.write("No market data.")

    with col2:
        st.markdown("**Confidence**")
        conf = result["structured"]["confidence"]
        st.progress(min(conf, 1.0))
        st.write(f"{conf:.2f}")

    with col3:
        st.markdown("**Primary driver**")
        st.write(result["structured"].get("primary_driver") or "No strong driver detected")

    st.markdown("---")
    st.markdown("### News considered")
    news_view = result["news_view"]
    if news_view["has_news"]:
        for art in news_view["articles_used"]:
            st.markdown(f"**{art.get('title')}**  \n{art.get('source', {}).get('name','')} â€” {art.get('publishedAt','')}")
    else:
        st.write("No relevant company-specific headlines found.")
else:
    st.info("Enter a ticker and date, then click **Explain**.")

Overwriting app.py


In [31]:
!nohup streamlit run app.py --server.port 8501 --server.address 0.0.0.0 >/content/streamlit.log 2>&1 &
print("âœ… Streamlit started on port 8501")

âœ… Streamlit started on port 8501


In [32]:
from pyngrok import ngrok

In [33]:
ngrok.set_auth_token("357WdiTbqbeoeoHQiH3Cu43zMc0_87pFXuKufjefrQ7hxSmsk")


In [34]:
public_url = ngrok.connect(8501, "http")
print("ðŸ”— Public URL:", public_url)

ðŸ”— Public URL: NgrokTunnel: "https://densimetrically-steamless-kortney.ngrok-free.dev" -> "http://localhost:8501"
