# 04 — Forward Returns & Direction Bias
**SweetReturns Golden City** data pipeline.

This notebook reads `stock_tickets.parquet` and computes:
1. **Direction bias** — buy / call / put / short probability weights derived from forward return skew and median
2. **Store dimensions** — width, depth, height scaled by `golden_score` (platinum bonus for height)
3. **Agent density** — number of simulated agents per store, driven by score and platinum status

Output: `stock_directions.parquet`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

df = pd.read_parquet('stock_tickets.parquet')
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"Tickers: {df['ticker'].nunique()}")
print(f"Date range: {df['Date'].min()} — {df['Date'].max()}")
df.head()

In [None]:
# ---- Direction Bias ----
# Assign buy / call / put / short probability weights based on
# the 60-day forward return skew and median forward return.

def compute_direction_bias(row):
    skew = row.get("fwd_60d_skew", 0)
    median = row.get("fwd_median", 0)
    if pd.isna(skew) or pd.isna(median):
        return {"buy": 0.25, "call": 0.25, "put": 0.25, "short": 0.25}
    if skew > 0.5 and median > 0.02:
        return {"buy": 0.50, "call": 0.30, "put": 0.10, "short": 0.10}
    elif skew > 0.3 and abs(median) < 0.02:
        return {"buy": 0.20, "call": 0.50, "put": 0.20, "short": 0.10}
    elif skew < -0.3:
        return {"buy": 0.10, "call": 0.10, "put": 0.45, "short": 0.35}
    else:
        return {"buy": 0.30, "call": 0.25, "put": 0.25, "short": 0.20}

bias = df.apply(compute_direction_bias, axis=1, result_type='expand')
df["buy_pct"] = bias["buy"]
df["call_pct"] = bias["call"]
df["put_pct"] = bias["put"]
df["short_pct"] = bias["short"]

print("Direction bias distribution:")
print(df[["buy_pct", "call_pct", "put_pct", "short_pct"]].describe().round(3))

In [None]:
# ---- Store Dimensions ----
# Width and depth grow modestly with golden_score;
# height grows more aggressively and gets a 1.5x platinum bonus.

base_width = 3
base_depth = 3
df["store_width"] = base_width + df["golden_score"] * 0.5
df["store_depth"] = base_depth + df["golden_score"] * 0.5
df["store_height"] = 2 + df["golden_score"] * 2.5  # taller = higher score
df.loc[df["is_platinum"], "store_height"] *= 1.5  # platinum bonus

print("Store dimension statistics:")
print(df[["store_width", "store_depth", "store_height"]].describe().round(2))
print(f"\nPlatinum stores: {df['is_platinum'].sum()}")
print(f"Max height (platinum): {df.loc[df['is_platinum'], 'store_height'].max():.1f}")
print(f"Max height (non-plat): {df.loc[~df['is_platinum'], 'store_height'].max():.1f}")

In [None]:
# ---- Agent Density ----
# Agents per store scale super-linearly with golden_score,
# and platinum stores receive a 3x multiplier.

base_agents = 200
df["agent_density"] = base_agents + np.floor(df["golden_score"] ** 2.5 * 800)
df.loc[df["is_platinum"], "agent_density"] *= 3  # platinum 3x multiplier

print("Agent density statistics:")
print(df["agent_density"].describe().round(0))
print(f"\nTop 10 agent-dense stores (latest date):")
latest = df[df["Date"] == df["Date"].max()]
print(latest.nlargest(10, "agent_density")[["ticker", "golden_score", "is_platinum", "agent_density"]])

In [None]:
# ---- Visualization: Direction Bias Breakdown by Sector ----

latest = df[df["Date"] == df["Date"].max()].copy()

sector_bias = latest.groupby("sector")[
    ["buy_pct", "call_pct", "put_pct", "short_pct"]
].mean()

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Stacked bar chart
sector_bias.plot(
    kind="barh",
    stacked=True,
    ax=axes[0],
    color=["#00BFFF", "#7FFF00", "#FF69B4", "#FF4500"],
    edgecolor="#333",
)
axes[0].set_title("Average Direction Bias by Sector")
axes[0].set_xlabel("Probability Weight")
axes[0].legend(title="Direction", loc="lower right")

# Store height distribution by sector
sector_order = latest.groupby("sector")["store_height"].median().sort_values().index
sns.boxplot(
    data=latest,
    y="sector",
    x="store_height",
    order=sector_order,
    ax=axes[1],
    palette="viridis",
)
axes[1].set_title("Store Height Distribution by Sector")
axes[1].set_xlabel("Store Height")

plt.tight_layout()
plt.savefig("04_direction_bias_by_sector.png", dpi=150)
plt.show()

# Agent density scatter
fig, ax = plt.subplots(figsize=(10, 6))
colors = latest["is_platinum"].map({True: "#FFD700", False: "#666"})
ax.scatter(
    latest["golden_score"],
    latest["agent_density"],
    c=colors,
    alpha=0.6,
    edgecolor="#333",
    s=40,
)
ax.set_xlabel("Golden Score")
ax.set_ylabel("Agent Density")
ax.set_title("Agent Density vs Golden Score (gold = platinum)")
plt.tight_layout()
plt.savefig("04_agent_density.png", dpi=150)
plt.show()

In [None]:
# ---- Save to stock_directions.parquet ----

out_cols = [
    col for col in df.columns
]
df.to_parquet("stock_directions.parquet", index=False)
print(f"Saved stock_directions.parquet: {df.shape}")
print(f"New columns added: buy_pct, call_pct, put_pct, short_pct, "
      f"store_width, store_depth, store_height, agent_density")
print(f"\nTotal columns: {len(df.columns)}")
print(f"Sample (latest date):")
latest_sample = df[df['Date'] == df['Date'].max()].head(5)
print(latest_sample[[
    'ticker', 'golden_score', 'is_platinum',
    'buy_pct', 'call_pct', 'put_pct', 'short_pct',
    'store_width', 'store_height', 'agent_density'
]].to_string(index=False))

## Summary

| Output | Description |
|---|---|
| `buy_pct`, `call_pct`, `put_pct`, `short_pct` | Direction bias weights based on forward skew & median |
| `store_width`, `store_depth`, `store_height` | 3D store dimensions scaled by `golden_score` (platinum bonus on height) |
| `agent_density` | Simulated agents per store (`score^2.5 * 800 + 200`, platinum 3x) |

**Saved:** `stock_directions.parquet`

**Next:** `05_export_json.ipynb` — assemble the final JSON payload for the React frontend.