# 05 — Export prédictions

On entraîne un modèle final et on exporte une série de prédictions (CSV) pour la soutenance.

In [1]:
import sys
from pathlib import Path

ROOT = Path("..").resolve()
SRC = ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

import numpy as np
import pandas as pd

from utils import get_logger
logger = get_logger("notebook", log_file=str(ROOT/"logs"/"run.log"))

from data import load_ohlc_from_xlsx
from features import build_features
from labels import add_target_20d_score, fit_score_scaler
from models import get_baselines
from sklearn.model_selection import train_test_split

XLSX = str(ROOT / "dataset_train.xlsx")
df = load_ohlc_from_xlsx(XLSX, sheet_name="Gold")
df = add_target_20d_score(build_features(df), horizon=20)
df = df.dropna().reset_index(drop=True)

exclude = {"Date","Open","High","Low","Close","fut_ret_20","y_score"}
feature_cols = [c for c in df.columns if c not in exclude]

# Split final: last 20% as test (simple, for export)
cut = int(len(df) * 0.8)
train_df, test_df = df.iloc[:cut], df.iloc[cut:]

scale = fit_score_scaler(train_df["fut_ret_20"], std_mult=2.0)
y_train = np.clip(train_df["fut_ret_20"].to_numpy() / scale, -1.0, 1.0)
y_test  = np.clip(test_df["fut_ret_20"].to_numpy() / scale, -1.0, 1.0)

X_train = train_df[feature_cols].to_numpy()
X_test  = test_df[feature_cols].to_numpy()

# pick a strong baseline
spec = [s for s in get_baselines() if s.name == "gbrt"][0]
model = spec.model
model.fit(X_train, y_train)
pred = model.predict(X_test)

out = test_df[["Date","Close"]].copy()
out["y_true"] = y_test
out["y_pred"] = pred

out_path = ROOT / "outputs" / "gold_predictions.csv"
out.to_csv(out_path, index=False)
logger.info("Exported: %s (%d rows)", out_path, len(out))
out.head()


2025-12-16 09:52:38,524 | INFO | data | Loading sheet=Gold from C:\Users\fayca\Downloads\hackathon_gold_project\hackathon_gold_project\dataset_train.xlsx
2025-12-16 09:52:40,473 | INFO | data | Loaded 11340 rows, columns=['Date', 'Open', 'High', 'Low', 'Close', 'smavg_50', 'smavg_100', 'smavg_240']
2025-12-16 09:52:40,475 | INFO | features | Building features...
2025-12-16 09:52:40,527 | INFO | features | Features built. Total columns=34
2025-12-16 09:52:40,554 | INFO | labels | Fitted score scale=0.118067 (std_mult=2.00, std=0.059033)
2025-12-16 09:53:06,522 | INFO | notebook | Exported: C:\Users\fayca\Downloads\hackathon_gold_project\hackathon_gold_project\outputs\gold_predictions.csv (2217 rows)


Unnamed: 0,Date,Close,y_true,y_pred
8864,2010-05-27,1212.63,0.214568,0.120887
8865,2010-05-28,1214.73,0.285527,0.226574
8866,2010-05-31,1216.3,0.157655,0.21862
8867,2010-06-01,1225.78,0.102264,0.18512
8868,2010-06-02,1223.55,0.130347,0.353332


✅ Le fichier exporté se trouve dans `outputs/gold_predictions.csv`.