In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# =========================
# 1. CHARGEMENT DU DATASET
# =========================

df = pd.read_csv(
    "DATASET_1000DAYS_VARIATION.csv",
    parse_dates=["date"]
).set_index("date")

# On ne garde que les colonnes utiles
df = df[["GC=F", "^NDX", "AAPL", "URW.PA"]].dropna()

# =========================
# 2. FEATURES & TARGET
# =========================

X = df[["GC=F", "^NDX", "AAPL"]]
y = df["URW.PA"].shift(-1)  # URW.PA à J+1

data = pd.concat([X, y], axis=1).dropna()
data.columns = ["GC=F", "^NDX", "AAPL", "URW_PRED_TARGET"]

# =========================
# 3. WALK-FORWARD PREDICTION
# =========================

predictions = []

MIN_TRAIN_SIZE = 60  # minimum historique pour entraîner

for i in range(MIN_TRAIN_SIZE, len(data)):

    train = data.iloc[:i]

    X_train = sm.add_constant(train[["GC=F", "^NDX", "AAPL"]])
    y_train = train["URW_PRED_TARGET"]

    model = sm.OLS(y_train, X_train).fit()

    X_today = pd.DataFrame(
        [data.iloc[i][["GC=F", "^NDX", "AAPL"]]],
        columns=["GC=F", "^NDX", "AAPL"]
    )

    X_today = sm.add_constant(X_today, has_constant="add")

    pred = model.predict(X_today).iloc[0]

    predictions.append(pred)

# =========================
# 4. AJOUT DE LA COLONNE DE PRÉDICTION
# =========================

# Aligner avec les dates
pred_series = pd.Series(
    predictions,
    index=data.index[MIN_TRAIN_SIZE:],
    name="URW.PA_PRED_TOMORROW"
)

final_dataset = df.copy()
final_dataset["URW.PA_PRED_TOMORROW"] = pred_series

# =========================
# 5. RÉSULTAT FINAL
# =========================

# Création du signal d'achat / vente
final_dataset["signal"] = np.where(
    final_dataset["URW.PA_PRED_TOMORROW"] > 0.15,
    "buy",
    np.where(
        final_dataset["URW.PA_PRED_TOMORROW"] < -0.15,
        "sell",
        "keep"
    )
)

# Vérification
print(final_dataset[[
    "URW.PA",
    "URW.PA_PRED_TOMORROW",
    "signal"
]].tail(10))


print(final_dataset.tail(10))

# Sauvegarde
final_dataset.to_csv(
    "URW_PREDICTION_DATASET.csv"
)


              URW.PA  URW.PA_PRED_TOMORROW signal
date                                             
2025-12-23 -0.388938              0.329079    buy
2025-12-24  0.108466              0.158736    buy
2025-12-30  0.777027              0.156251    buy
2025-12-31 -0.663949             -0.268578   sell
2026-01-02 -0.258738             -0.040918   keep
2026-01-05  0.734977              0.434830    buy
2026-01-06 -0.472097              0.117453   keep
2026-01-07  2.112979             -0.129670   keep
2026-01-08  0.443410             -0.077007   keep
2026-01-09 -0.945976                   NaN   keep
                GC=F      ^NDX      AAPL    URW.PA  URW.PA_PRED_TOMORROW  \
date                                                                       
2025-12-23  0.859463  0.495375  0.512966 -0.388938              0.329079   
2025-12-24 -0.049070  0.267003  0.532388  0.108466              0.158736   
2025-12-30  1.040438 -0.246811 -0.248401  0.777027              0.156251   
2025-12-31 -1.018283