In [7]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
FinGPT-Forecaster (GPT-4 edition)
Local test script – no Gradio required
"""

import os, re, json, time, random
from collections import defaultdict
from datetime import date, datetime, timedelta
from dotenv import load_dotenv

import pandas as pd
import yfinance as yf
import finnhub
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# ---------- 0  CONFIG ---------------------------------------------------------

OPENAI_MODEL  = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
FINNHUB_KEY   = os.getenv("FINNHUB_API_KEY")

if not FINNHUB_KEY:
    raise RuntimeError("FINNHUB_API_KEY not set")

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
finnhub_client = finnhub.Client(api_key=FINNHUB_KEY)


SYSTEM_PROMPT = (
    "You are a seasoned stock-market analyst. "
    "Given recent company news and optional basic financials, "
    "return:\n"
    "[Positive Developments] – 2-4 bullets\n"
    "[Potential Concerns] – 2-4 bullets\n"
    "[Prediction & Analysis] – a one-week price outlook with rationale."
)


# ---------- 1  DATE / UTILITY HELPERS ----------------------------------------

def today() -> str:
    return date.today().strftime("%Y-%m-%d")

def n_weeks_before(date_string: str, n: int) -> str:
    return (datetime.strptime(date_string, "%Y-%m-%d") -
            timedelta(days=7 * n)).strftime("%Y-%m-%d")


# ---------- 2  DATA FETCHING --------------------------------------------------

def get_stock_data(symbol: str, steps: list[str]) -> pd.DataFrame:
    start, end = steps[0], steps[-1]
    df = yf.download(symbol, start=start, end=end,
                     auto_adjust=False, progress=False, threads=False)

    if df.empty:
        raise RuntimeError(f"Cannot download price data for {symbol}")
    # deal with possible multiindex
    if isinstance(df.columns, pd.MultiIndex):
        close_ser = df["Close"][symbol]          # Series, index = date
    else:
        close_ser = df["Close"]

    dates, prices = [], []
    avail = df.index.astype(str)

    for d in steps[:-1]:
        for i in range(len(df)):
            if avail[i] >= d:
                dates.append(df.index[i])
                prices.append(float(close_ser.iloc[i]))   # ← 直接 float
                break

    dates.append(df.index[-1])
    prices.append(float(close_ser.iloc[-1]))

    return pd.DataFrame({
        "Start Date": dates[:-1], "End Date": dates[1:],
        "Start Price": prices[:-1], "End Price": prices[1:]
    })



def current_basics(symbol: str, curday: str) -> dict:
    raw = finnhub_client.company_basic_financials(symbol, "all")
    if not raw["series"]:
        return {}
    merged = defaultdict(dict)
    for metric, vals in raw["series"]["quarterly"].items():
        for v in vals:
            merged[v["period"]][metric] = v["v"]

    latest = max((p for p in merged if p <= curday), default=None)
    if latest is None:
        return {}
    d = dict(merged[latest])
    d["period"] = latest
    return d

def attach_news(symbol: str, df: pd.DataFrame) -> pd.DataFrame:
    news_col = []
    for _, row in df.iterrows():
        start = row["Start Date"].strftime("%Y-%m-%d")
        end   = row["End Date"].strftime("%Y-%m-%d")
        time.sleep(1)                                        # Finnhub QPM guard
        weekly = finnhub_client.company_news(symbol, _from=start, to=end)
        weekly_fmt = [
            {
                "date"    : datetime.fromtimestamp(n["datetime"]).strftime("%Y%m%d%H%M%S"),
                "headline": n["headline"],
                "summary" : n["summary"],
            }
            for n in weekly
        ]
        weekly_fmt.sort(key=lambda x: x["date"])
        news_col.append(json.dumps(weekly_fmt))
    df["News"] = news_col
    return df


# ---------- 3  PROMPT CONSTRUCTION -------------------------------------------

def sample_news(news: list[str], k: int = 5) -> list[str]:
    if len(news) <= k: return news
    return [news[i] for i in sorted(random.sample(range(len(news)), k))]


def make_prompt(symbol: str, df: pd.DataFrame, curday: str, use_basics=False) -> str:
    # Company profile
    prof = finnhub_client.company_profile2(symbol=symbol)
    company_blurb = (
        f"[Company Introduction]:\n{prof['name']} operates in the "
        f"{prof['finnhubIndustry']} sector ({prof['country']}). "
        f"Founded {prof['ipo']}, market cap {prof['marketCapitalization']:.1f} "
        f"{prof['currency']}; ticker {symbol} on {prof['exchange']}.\n"
    )

    # Past weeks block
    past_block = ""
    for _, row in df.iterrows():
        term = "increased" if row["End Price"] > row["Start Price"] else "decreased"
        head = (f"From {row['Start Date']:%Y-%m-%d} to {row['End Date']:%Y-%m-%d}, "
                f"{symbol}'s stock price {term} from "
                f"{row['Start Price']:.2f} to {row['End Price']:.2f}.")
        news_items = json.loads(row["News"])
        summaries  = [
            f"[Headline] {n['headline']}\n[Summary] {n['summary']}\n"
            for n in news_items
            if not n["summary"].startswith("Looking for stock market analysis")
        ]
        past_block += "\n" + head + "\n" + "".join(sample_news(summaries, 5))

    # Optional basic financials
    if use_basics:
        basics = current_basics(symbol, curday)
        if basics:
            basics_txt = "\n".join(f"{k}: {v}" for k, v in basics.items() if k != "period")
            basics_block = (f"\n[Basic Financials] (reported {basics['period']}):\n{basics_txt}\n")
        else:
            basics_block = "\n[Basic Financials]: not available\n"
    else:
        basics_block = "\n[Basic Financials]: not requested\n"

    horizon = f"{curday} to {n_weeks_before(curday, -1)}"
    final_user_msg = (
        company_blurb
        + past_block
        + basics_block
        + f"\nBased on all information before {curday}, analyse positive "
          "developments and potential concerns for {symbol}, then predict its "
          f"price movement for next week ({horizon})."
    )
    return final_user_msg


# ---------- 4  LLM CALL -------------------------------------------------------

def chat_completion(prompt: str,
                    model: str = OPENAI_MODEL,
                    temperature: float = 0.3,
                    stream: bool = False) -> str:

    response = client.chat.completions.create(
        model=model,
        temperature=temperature,
        stream=stream,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user",   "content": prompt}
        ],
    )

    if stream:
        collected = []
        for chunk in response:
            delta = chunk.choices[0].delta.content or ""
            print(delta, end="", flush=True)
            collected.append(delta)
        print()
        return "".join(collected)

    # without stream
    return response.choices[0].message.content


# ---------- 5  MAIN ENTRY (CLI test) -----------------------------------------

def predict(symbol: str = "AAPL",
            curday: str = today(),
            n_weeks: int = 3,
            use_basics: bool = False,
            stream: bool = False) -> tuple[str, str]:
    steps = [n_weeks_before(curday, n) for n in range(n_weeks + 1)][::-1]
    df    = get_stock_data(symbol, steps)
    df    = attach_news(symbol, df)

    prompt_info = make_prompt(symbol, df, curday, use_basics)
    answer      = chat_completion(prompt_info, stream=stream)

    return prompt_info, answer

In [8]:
info, ans = predict(symbol='NVDA', stream=True)  

**[Positive Developments]**
- **AI Collaboration with Schaeffler**: Nvidia's partnership with Schaeffler to advance digital manufacturing highlights its commitment to innovation and expanding its influence in the AI sector, potentially leading to new revenue streams.
- **Investment in TerraPower**: Nvidia's $650 million investment in TerraPower indicates strong confidence in the future of energy and AI technologies, which could enhance its portfolio and market position.
- **Market Momentum**: Nvidia's recent stock performance, including a notable increase to record highs, reflects strong investor sentiment and confidence in its growth prospects, particularly in AI and semiconductor markets.
- **Dominance in AI Chip Market**: Nvidia remains a key player in the AI chip market, with significant demand from major clients like OpenAI, solidifying its competitive edge despite emerging competition from AMD.

**[Potential Concerns]**
- **Increased Competition from AMD**: AMD's unveiling of new