
# 🌦️ Alt-Data: Weather → Market Signals

_Date generated: 2025-09-03_

This notebook builds **weather-derived trading signals** and tests if they have **predictive power** for assets
(equities/commodities/FX) via regional mapping.

**What’s inside**
- Data loaders: prices (CSV) and weather (Open‑Meteo API) with **offline synthetic fallback**
- Feature engineering: **temperature anomaly**, **HDD/CDD**, **precipitation/ wind / storm** signals
- Region mapper: map tickers to geo locations
- Cross‑sectional backtest: rank long/short with costs, risk targeting
- Diagnostics: IC/IR, factor regression vs returns, rolling performance


## 0) Parameters

In [None]:

PATH_PRICES = "data/prices.csv"            # wide: date, tickers...
PATH_MAP    = "data/asset_geo_map.csv"     # ticker, region, lat, lon

DEFAULT_REGIONS = [
    {"region": "US", "city": "New York", "lat": 40.7128, "lon": -74.0060, "tickers": ["AAPL","MSFT","SPY","CORN"]},
    {"region": "EU", "city": "Frankfurt", "lat": 50.1109, "lon": 8.6821,   "tickers": ["SAP","DAX","HEAT"]},
    {"region": "IN", "city": "Mumbai",    "lat": 19.0760, "lon": 72.8777,  "tickers": ["RELIANCE.NS","NIFTY","SUGR"]},
]

DAYS = 365
LOOKBACK_T_ANOM  = 30
LOOKBACK_HDDCDD  = 7
TOP_N = 5
TARGET_VOL_ANNUAL = 0.10

COMMISSION_BPS = 0.2
SLIPPAGE_BPS   = 1.0


## 1) Setup & Helpers

In [None]:

import os, math, warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.float_format = "{:,.6f}".format
warnings.filterwarnings("ignore")

try:
    import requests
    HAS_REQ = True
except Exception:
    HAS_REQ = False

def drawdown_curve(nav: pd.Series) -> pd.Series:
    rm = nav.cummax()
    return nav/rm - 1.0

def pct_change_safe(x: pd.DataFrame) -> pd.DataFrame:
    return x.pct_change().replace([np.inf, -np.inf], np.nan)

def cs_rank(df: pd.DataFrame) -> pd.DataFrame:
    return df.rank(axis=1, pct=True).subtract(0.5) * 2.0


## 2) Load Prices (CSV or synthetic)

In [None]:

def load_prices(path=PATH_PRICES, n_assets=12, n_days=500, seed=7):
    if os.path.exists(path):
        df = pd.read_csv(path, parse_dates=['date']).set_index('date').sort_index()
        return df
    rng = np.random.default_rng(seed)
    dates = pd.bdate_range("2023-01-01", periods=n_days)
    market = rng.normal(0.0003, 0.009, size=len(dates))
    regions = {"US": rng.normal(0.0001, 0.006, size=len(dates)),
               "EU": rng.normal(0.0001, 0.006, size=len(dates)),
               "IN": rng.normal(0.0002, 0.008, size=len(dates))}
    groups = [("AAPL","US"),("MSFT","US"),("SPY","US"),("CORN","US"),
              ("SAP","EU"),("DAX","EU"),("HEAT","EU"),
              ("RELIANCE.NS","IN"),("NIFTY","IN"),("SUGR","IN")]
    data = {}
    for name, reg in groups:
        idio = rng.normal(0, 0.01, size=len(dates))
        ret = market + 0.5*regions[reg] + 0.5*idio
        data[name] = 100 * (1 + pd.Series(ret, index=dates)).cumprod()
    return pd.DataFrame(data, index=dates)

prices = load_prices()
returns = pct_change_safe(prices).fillna(0.0)
prices.head()


## 3) Asset to Region Mapping

In [None]:

def load_asset_geo_map(path=PATH_MAP, defaults=DEFAULT_REGIONS):
    if os.path.exists(path):
        return pd.read_csv(path)
    rows = []
    for r in defaults:
        for t in r["tickers"]:
            rows.append({"ticker": t, "region": r["region"], "lat": r["lat"], "lon": r["lon"]})
    return pd.DataFrame(rows)

asset_geo = load_asset_geo_map()
asset_geo.head()


## 4) Weather Data (synthetic fallback)

In [None]:

def synthetic_weather(region, start, end, seed=0):
    rng = np.random.default_rng(seed)
    idx = pd.date_range(start, end, freq="D")
    base_temp = {"US": 12.0, "EU": 10.0, "IN": 26.0}.get(region, 15.0)
    temp = base_temp + 10*np.sin(np.linspace(0, 4*np.pi, len(idx))) + rng.normal(0, 3, len(idx))
    precip = np.clip(rng.gamma(1.2, 2.0, len(idx)) - 1.0, 0, None)
    wind = np.abs(rng.normal(5.0, 2.0, len(idx)))
    return pd.DataFrame({
        "temperature": temp,
        "precipitation": precip,
        "wind": wind
    }, index=idx)

weather_panel = {reg: synthetic_weather(reg, prices.index.min(), prices.index.max(), seed=i)
                 for i, reg in enumerate(asset_geo["region"].unique())}
list(weather_panel.keys())


## 5) Feature Engineering

In [None]:

def weather_features(df, lb_anom=30, lb_hddcdd=7):
    x = df.copy()
    x["temp_anom"] = x["temperature"] - x["temperature"].rolling(lb_anom).mean()
    base = 18.0
    x["HDD"] = (base - x["temperature"]).clip(lower=0.0)
    x["CDD"] = (x["temperature"] - base).clip(lower=0.0)
    x["HDD_acc"] = x["HDD"].rolling(lb_hddcdd).sum()
    x["CDD_acc"] = x["CDD"].rolling(lb_hddcdd).sum()
    x["precip_surprise"] = x["precipitation"] - x["precipitation"].rolling(30).mean()
    x["wind_z"] = (x["wind"] - x["wind"].rolling(30).mean()) / (x["wind"].rolling(30).std(ddof=1)+1e-9)
    return x

features_by_region = {reg: weather_features(df) for reg, df in weather_panel.items()}
list(features_by_region.keys())


## 6) Map Features to Tickers

In [None]:

feat_cols = ["temp_anom","HDD_acc","CDD_acc","precip_surprise","wind_z"]
signal_frames = {}
for col in feat_cols:
    sig = pd.DataFrame(index=prices.index, columns=prices.columns)
    for _, row in asset_geo.iterrows():
        tkr, reg = row["ticker"], row["region"]
        if reg in features_by_region:
            sig[tkr] = features_by_region[reg][col].reindex(prices.index).values
    signal_frames[col] = sig

weather_signal = (
    0.4*cs_rank(signal_frames["temp_anom"]) +
    0.3*cs_rank(signal_frames["CDD_acc"]-signal_frames["HDD_acc"]) +
    0.2*cs_rank(signal_frames["precip_surprise"]) +
    0.1*cs_rank(signal_frames["wind_z"])
).fillna(0.0)

weather_signal.tail()


## 7) Backtest

In [None]:

def build_ls_weights(signal, top_n=5, bottom_n=5):
    w = pd.DataFrame(0.0, index=signal.index, columns=signal.columns)
    for dt, row in signal.iterrows():
        s = row.dropna()
        if len(s) < (top_n+bottom_n): continue
        longs = s.nlargest(top_n).index
        shorts = s.nsmallest(bottom_n).index
        w.loc[dt,longs] = 1.0/len(longs)
        w.loc[dt,shorts] = -1.0/len(shorts)
        w.loc[dt] -= w.loc[dt].mean()
        g = w.loc[dt].abs().sum()
        if g>0: w.loc[dt] /= g
    return w

def backtest(weights, returns, commission_bps=0.2, slippage_bps=1.0):
    gross = (weights.shift(1)*returns).sum(axis=1)
    dw = (weights-weights.shift(1)).abs().sum(axis=1)
    cost = dw*((commission_bps+slippage_bps)/10000.0)
    return (gross-cost).fillna(0.0)

w0 = build_ls_weights(weather_signal, TOP_N, TOP_N)
r0 = backtest(w0, returns)
equity = (1+r0).cumprod()
dd = drawdown_curve(equity)
summary = {"CAGR": (equity.iloc[-1])**(252/len(r0))-1, "Vol": r0.std()*np.sqrt(252), "Sharpe": (r0.mean()/r0.std())*np.sqrt(252)}
summary


### Equity & Drawdown

In [None]:

plt.figure(figsize=(10,4))
equity.plot()
plt.title("Weather Signal L/S Equity")
plt.show()

plt.figure(figsize=(10,3))
dd.plot()
plt.title("Drawdown")
plt.show()
