# 02 — Préprocessing CO₂ & Baseline équipondéré

In [None]:
import pandas as pd, numpy as np, json
from pathlib import Path
from src.metrics import annualized_return, annualized_volatility, cvar, max_drawdown
DATA_RAW = Path('../data/raw'); DATA_PROC = Path('../data/processed'); REPORTS = Path('../reports/results'); REPORTS.mkdir(parents=True, exist_ok=True)

In [None]:
returns = pd.read_csv(DATA_PROC/'returns.csv', index_col=0, parse_dates=True)
co2 = pd.read_csv(DATA_RAW/'co2_proxy.csv')
tickers = pd.read_csv('../tickers.csv')
universe = sorted(set(returns.columns) & set(tickers['Ticker']) & set(co2['Ticker']))
returns = returns[universe].dropna(how='all')
co2 = co2[co2['Ticker'].isin(universe)].drop_duplicates('Ticker')
len(universe), universe[:10]

In [None]:
w = np.repeat(1/len(universe), len(universe))
port_ret = (returns * w).sum(axis=1)
ann_ret = annualized_return(port_ret)
ann_vol = annualized_volatility(port_ret)
cvar95  = cvar(port_ret, alpha=0.95)
cum = (1 + port_ret).cumprod()
mdd = max_drawdown(cum)
co2_map = co2.set_index('Ticker')['CO2_intensity'].reindex(universe).fillna(co2['CO2_intensity'].median())
port_co2 = float(np.dot(w, co2_map.values))
metrics = {"n_assets": len(universe),"annualized_return": float(ann_ret),"annualized_volatility": float(ann_vol),"cvar_95": float(cvar95),"max_drawdown": float(mdd),"co2_intensity_weighted": float(port_co2),}
metrics

In [None]:
with open(REPORTS/'baseline_metrics.json', 'w') as f: json.dump(metrics, f, indent=2)
pd.Series(metrics)