In [4]:
import numpy as np
import pandas as pd

from utils import get_logger

from data import load_ohlc_from_xlsx
from features import build_features
from labels import add_target_20d_score, fit_score_scaler, apply_score
from split import time_series_splits
from models import get_baselines
from metrics import mse, directional_accuracy, information_coefficient

In [1]:
import pandas as pd
import sys
from pathlib import Path

ROOT = Path("..").resolve()
SRC = ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))
xls = pd.ExcelFile(ROOT / "dataset_train.xlsx", engine="openpyxl")
assets = xls.sheet_names
assets


['MSCI world',
 'S&P 500 index',
 'Euro stoxx 50',
 'Dax',
 'CAC 40',
 'Gold',
 'Pétrole',
 'Dollar Index',
 'EURUSD']

In [5]:
all_results = []

for asset in assets:
    try:
        df = load_ohlc_from_xlsx(ROOT/"dataset_train.xlsx", sheet_name=asset)
        df = add_target_20d_score(build_features(df), horizon=20)
        df = df.dropna().reset_index(drop=True)

        if len(df) < 300:
            continue  # pas assez de données

        exclude = {"Date","Open","High","Low","Close","fut_ret_20","y_score"}
        feature_cols = [c for c in df.columns if c not in exclude]

        X = df[feature_cols].to_numpy()
        future_ret = df["fut_ret_20"].to_numpy()

        for split_id, (tr, te) in enumerate(time_series_splits(len(df), n_splits=5), start=1):
            scale = fit_score_scaler(pd.Series(future_ret[tr]), std_mult=2.0)
            y_tr = (future_ret[tr] / scale).clip(-1, 1)
            y_te = (future_ret[te] / scale).clip(-1, 1)

            for spec in get_baselines():
                model = spec.model
                model.fit(X[tr], y_tr)
                pred = model.predict(X[te])

                all_results.append({
                    "asset": asset,
                    "model": spec.name,
                    "ic": information_coefficient(y_te, pred),
                    "dir_acc": directional_accuracy(y_te, pred),
                    "mse": mse(y_te, pred)
                })

    except Exception as e:
        print(f"❌ {asset} failed: {e}")

results_all = pd.DataFrame(all_results)
results_all


2025-12-15 16:37:14,316 | INFO | data | Loading sheet=MSCI world from C:\Users\fayca\Downloads\hackathon_gold_project\hackathon_gold_project\dataset_train.xlsx
2025-12-15 16:37:16,114 | INFO | data | Loaded 12387 rows, columns=['Date', 'Open', 'High', 'Low', 'Close', 'smavg_50', 'smavg_100', 'smavg_240']
2025-12-15 16:37:16,116 | INFO | features | Building features...
2025-12-15 16:37:16,150 | INFO | features | Features built. Total columns=34
2025-12-15 16:37:16,181 | INFO | labels | Fitted score scale=0.079461 (std_mult=2.00, std=0.039730)
2025-12-15 16:37:24,723 | INFO | labels | Fitted score scale=0.082479 (std_mult=2.00, std=0.041239)
2025-12-15 16:37:41,123 | INFO | labels | Fitted score scale=0.078186 (std_mult=2.00, std=0.039093)
2025-12-15 16:38:07,047 | INFO | labels | Fitted score scale=0.081490 (std_mult=2.00, std=0.040745)
2025-12-15 16:38:42,563 | INFO | labels | Fitted score scale=0.085597 (std_mult=2.00, std=0.042798)
2025-12-15 16:39:27,603 | INFO | data | Loading shee

Unnamed: 0,asset,model,ic,dir_acc,mse
0,MSCI world,ridge,0.058053,0.430693,0.685453
1,MSCI world,random_forest,0.008372,0.394554,0.946715
2,MSCI world,gbrt,-0.075678,0.373762,1.505200
3,MSCI world,ridge,-0.053449,0.568317,0.200035
4,MSCI world,random_forest,0.164298,0.477723,0.805504
...,...,...,...,...,...
130,EURUSD,random_forest,0.029951,0.498368,0.288372
131,EURUSD,gbrt,0.088569,0.494015,0.287685
132,EURUSD,ridge,-0.005709,0.500000,0.129678
133,EURUSD,random_forest,0.065297,0.497280,0.140354


In [6]:
results_all


Unnamed: 0,asset,model,ic,dir_acc,mse
0,MSCI world,ridge,0.058053,0.430693,0.685453
1,MSCI world,random_forest,0.008372,0.394554,0.946715
2,MSCI world,gbrt,-0.075678,0.373762,1.505200
3,MSCI world,ridge,-0.053449,0.568317,0.200035
4,MSCI world,random_forest,0.164298,0.477723,0.805504
...,...,...,...,...,...
130,EURUSD,random_forest,0.029951,0.498368,0.288372
131,EURUSD,gbrt,0.088569,0.494015,0.287685
132,EURUSD,ridge,-0.005709,0.500000,0.129678
133,EURUSD,random_forest,0.065297,0.497280,0.140354


In [7]:
summary = (
    results_all
    .groupby(["asset","model"])
    .mean()
    .reset_index()
    .sort_values(["asset","ic"], ascending=[True, False])
)

summary


Unnamed: 0,asset,model,ic,dir_acc,mse
0,CAC 40,gbrt,0.104343,0.540435,0.36738
1,CAC 40,random_forest,0.054719,0.524883,0.410935
2,CAC 40,ridge,0.013629,0.471073,0.705627
4,Dax,random_forest,0.030571,0.478231,0.442123
3,Dax,gbrt,0.021556,0.468191,0.522334
5,Dax,ridge,-0.010995,0.465308,0.480299
7,Dollar Index,random_forest,0.106363,0.50095,0.313029
6,Dollar Index,gbrt,0.093039,0.503948,0.365078
8,Dollar Index,ridge,0.067493,0.506947,0.267195
9,EURUSD,gbrt,0.088047,0.51012,0.306273


In [8]:
out_path = ROOT / "outputs" / "results_all_assets_models.csv"
summary.to_csv(out_path, index=False)
print("Exported:", out_path)


Exported: C:\Users\fayca\Downloads\hackathon_gold_project\hackathon_gold_project\outputs\results_all_assets_models.csv
