# Project Yield - Data Exploration & Ratio Testing

This notebook provides interactive examples for:
1. Loading parquet data (prices, quarterly & annual fundamentals)
2. Computing all ratios for a single stock via `RatioCalculator.get_all_ratios`
3. Batch ratio computation via `MetricsEngine.calculate_all_ratios`

## 1. Setup

In [None]:
import sys
from pathlib import Path

# Ensure the project root is on the path so imports work
project_root = Path.cwd().parent
if str(project_root / "src") not in sys.path:
    sys.path.insert(0, str(project_root / "src"))

import polars as pl
from project_yield.config import Settings
from project_yield.data.reader import DataReader
from project_yield.analysis.ratios import RatioCalculator
from project_yield.analysis.metrics import MetricsEngine

# Initialize with data_path pointing to the project data directory
settings = Settings(data_path=project_root / "data")
reader = DataReader(settings)

print(f"Data path: {settings.data_path}")
print(f"Available price tickers: {reader.list_tickers('prices')[:10]} ...")
print(f"Available quarterly tickers: {reader.list_tickers('quarterly')[:10]} ...")

## 2. Load Price Data

In [None]:
TICKER = "MSFT"  # Change this to test other stocks

# Load all price data for a single ticker (lazy -> collect)
prices_df = reader.get_prices(ticker=TICKER).collect()
print(f"{TICKER} price rows: {len(prices_df)}")
print(f"Date range: {prices_df['date'].min()} to {prices_df['date'].max()}")
print(f"Columns: {prices_df.columns}\n")
prices_df.head(10)

In [None]:
# Latest price
latest = reader.get_latest_price(TICKER)
print(f"Latest price for {TICKER}:")
latest

In [None]:
# Filter by date range
from datetime import date

prices_2024 = reader.get_prices(
    ticker=TICKER,
    start_date=date(2024, 1, 1),
    end_date=date(2024, 12, 31),
).collect()

print(f"{TICKER} 2024 price rows: {len(prices_2024)}")
prices_2024.tail(10)

## 3. Load Fundamentals Data

In [None]:
# Quarterly fundamentals
quarterly_df = reader.get_fundamentals_quarterly(ticker=TICKER).collect()
print(f"{TICKER} quarterly rows: {len(quarterly_df)}")
print(f"Columns ({len(quarterly_df.columns)}): {quarterly_df.columns}\n")
quarterly_df.sort("fiscal_period", descending=True).head(8)

In [None]:
# Annual fundamentals
annual_df = reader.get_fundamentals_annual(ticker=TICKER).collect()
print(f"{TICKER} annual rows: {len(annual_df)}")
annual_df.sort("fiscal_period", descending=True).head()

In [None]:
# TTM (Trailing Twelve Months) fundamentals
ttm_df = reader.get_ttm_fundamentals(TICKER)
print(f"{TICKER} TTM fundamentals:")
ttm_df

In [None]:
# Select specific columns to inspect key financials
key_cols = ["fiscal_period", "revenue", "gross_profit", "operating_income",
            "net_income", "rd_expense", "shares_outstanding", "free_cash_flow"]
available = [c for c in key_cols if c in quarterly_df.columns]

quarterly_df.select(available).sort("fiscal_period", descending=True).head(8)

## 4. RatioCalculator - `get_all_ratios` for a Single Stock

In [None]:
calc = RatioCalculator(settings)

ratios = calc.get_all_ratios(TICKER)
print(f"All ratios for {TICKER}:")
for key, value in ratios.items():
    print(f"  {key:20s}: {value}")

In [None]:
# View as a single-row DataFrame for easy comparison
ratios_df = pl.DataFrame([ratios])
ratios_df

In [None]:
# You can also call individual ratios
pe = calc.get_pe_ratio(TICKER)
margin = calc.get_operating_margin(TICKER)
print(f"{TICKER} PE Ratio:         {pe}")
print(f"{TICKER} Operating Margin:  {margin}")

## 5. MetricsEngine - `calculate_all_ratios` for Multiple Stocks

In [None]:
engine = MetricsEngine(settings)

# Calculate ratios for a small batch of tickers
tickers = ["AAPL", "MSFT", "GOOG", "NVDA", "AMZN"]
all_ratios_df = engine.calculate_all_ratios(tickers)

print(f"Batch ratios shape: {all_ratios_df.shape}")
all_ratios_df

In [None]:
# Compare specific metrics across tickers
comparison = engine.compare_tickers(
    tickers,
    metrics=["pe_ratio", "operating_margin", "revenue_growth"],
)
comparison

In [None]:
# Rank by a metric
ranked = engine.rank_by_metric("pe_ratio", tickers=tickers, ascending=True)
print("Ranked by PE ratio (lowest first):")
ranked

In [None]:
# Valuation summary for a single ticker
summary = engine.get_valuation_summary(TICKER)
print(f"Valuation summary for {TICKER}:")
for key, value in summary.items():
    print(f"  {key:20s}: {value}")

## 6. Visualize DataFrames

In [None]:
import plotly.express as px

# Price history chart
prices_pd = prices_df.sort("date").to_pandas()
fig = px.line(prices_pd, x="date", y="close", title=f"{TICKER} Closing Price")
fig.show()

In [None]:
# Quarterly revenue trend
rev_df = quarterly_df.select(["fiscal_period", "revenue"]).sort("fiscal_period").to_pandas()
fig = px.bar(rev_df, x="fiscal_period", y="revenue", title=f"{TICKER} Quarterly Revenue")
fig.show()

In [None]:
# Compare ratios across tickers as a grouped bar chart
metrics_to_plot = ["operating_margin", "net_profit_margin", "gross_margin"]
plot_df = all_ratios_df.select(["ticker"] + metrics_to_plot)
plot_pd = plot_df.to_pandas().melt(id_vars="ticker", var_name="metric", value_name="value")

fig = px.bar(
    plot_pd,
    x="ticker",
    y="value",
    color="metric",
    barmode="group",
    title="Margin Comparison Across Tickers",
)
fig.show()

In [None]:
# PE ratio comparison
pe_df = all_ratios_df.select(["ticker", "pe_ratio"]).drop_nulls().to_pandas()
fig = px.bar(pe_df, x="ticker", y="pe_ratio", title="PE Ratio Comparison", text_auto=".1f")
fig.show()