In [1]:
import pandas as pd
import taceconomics
from datetime import datetime

In [80]:
# APIKEY
taceconomics.api_key = "sk_o24BhJRqVpIvxVSXX3yiKRGgpDEejmyJ8pfLFX2q22s"  
start_date = '2023-01-01'
end_date = datetime.today().strftime("%Y-%m-%d")

In [81]:
# --- Importation des données ---

# Taux de change EUR/USD
usd_eur = taceconomics.getdata(f"EXR/EUR/WLD?start_date={start_date}")
usd_eur.columns = ["usd_eur"]
usd_eur = usd_eur.dropna()
eur_usd = 1 / usd_eur  # Inversion pour avoir EUR/USD
eur_usd.columns = ["eur_usd"]
eur_usd.index = pd.to_datetime(eur_usd.index)

print(eur_usd.head(10))
print(eur_usd.shape)

             eur_usd
timestamp           
2023-01-01  1.070452
2023-01-02  1.067737
2023-01-03  1.054652
2023-01-04  1.060839
2023-01-05  1.052192
2023-01-06  1.066382
2023-01-07  1.066382
2023-01-08  1.065598
2023-01-09  1.073445
2023-01-10  1.073860
(994, 1)


In [83]:
# --- Indicateurs quantitatifs ---

# Taux de croissance
eur_usd["taux_croissance"] = eur_usd["eur_usd"].pct_change() * 100

# Vol
eur_usd["vol"] = eur_usd["taux_croissance"].rolling(window=30).std()

# rendement à 10j (2 semaines en jours ouvrés)
eur_usd["rendement_10j"] = eur_usd["eur_usd"].pct_change(periods=10) * 100

print(eur_usd.head(10))
print(eur_usd.shape)

             eur_usd  taux_croissance  vol  rendement_10j
timestamp                                                
2023-01-01  1.070452              NaN  NaN            NaN
2023-01-02  1.067737        -0.253588  NaN            NaN
2023-01-03  1.054652        -1.225506  NaN            NaN
2023-01-04  1.060839         0.586644  NaN            NaN
2023-01-05  1.052192        -0.815133  NaN            NaN
2023-01-06  1.066382         1.348654  NaN            NaN
2023-01-07  1.066382         0.000000  NaN            NaN
2023-01-08  1.065598        -0.073526  NaN            NaN
2023-01-09  1.073445         0.736383  NaN            NaN
2023-01-10  1.073860         0.038659  NaN            NaN
(994, 4)


In [84]:
# --- Cible --- 

# Seuil de choc (±2σ)
eur_usd["seuil_haut"] = 2 * eur_usd["vol"]
eur_usd["seuil_bas"] = -2 * eur_usd["vol"]

# Variable cible : 1 si choc (hausse ou baisse), 0 sinon
eur_usd["target"] = ((eur_usd["taux_croissance"] >= eur_usd["seuil_haut"]) | (eur_usd["taux_croissance"] <= eur_usd["seuil_bas"])).astype(int)

print(eur_usd[["taux_croissance", "vol", "target"]].head(10))
print(eur_usd.shape)

            taux_croissance  vol  target
timestamp                               
2023-01-01              NaN  NaN       0
2023-01-02        -0.253588  NaN       0
2023-01-03        -1.225506  NaN       0
2023-01-04         0.586644  NaN       0
2023-01-05        -0.815133  NaN       0
2023-01-06         1.348654  NaN       0
2023-01-07         0.000000  NaN       0
2023-01-08        -0.073526  NaN       0
2023-01-09         0.736383  NaN       0
2023-01-10         0.038659  NaN       0
(994, 7)


In [85]:
# --- Indicateurs chartistes pour visualisation ---

# Moyennes mobiles
eur_usd["moyenne_mobile_7j"] = eur_usd["eur_usd"].rolling(window=7).mean()
eur_usd["moyenne_mobile_21j"] = eur_usd["eur_usd"].rolling(window=21).mean()

# Bandes de Bollinger
eur_usd["bollinger_moyenne"] = eur_usd["eur_usd"].rolling(window=20).mean()
eur_usd["bollinger_haut"] = eur_usd["bollinger_moyenne"] + 2 * eur_usd["eur_usd"].rolling(window=20).std()
eur_usd["bollinger_bas"] = eur_usd["bollinger_moyenne"] - 2 * eur_usd["eur_usd"].rolling(window=20).std()

print(eur_usd.head(10))
print(eur_usd.shape)


# RSI (Relative Strength Index)
def calculer_rsi(series, window=14):
    delta = series.diff()
    gains = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    pertes = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gains / pertes
    rsi = 100 - (100 / (1 + rs))
    return rsi

eur_usd["rsi_14j"] = calculer_rsi(eur_usd["eur_usd"])

print(eur_usd.shape)
print(eur_usd.tail(10))

             eur_usd  taux_croissance  vol  rendement_10j  seuil_haut  \
timestamp                                                               
2023-01-01  1.070452              NaN  NaN            NaN         NaN   
2023-01-02  1.067737        -0.253588  NaN            NaN         NaN   
2023-01-03  1.054652        -1.225506  NaN            NaN         NaN   
2023-01-04  1.060839         0.586644  NaN            NaN         NaN   
2023-01-05  1.052192        -0.815133  NaN            NaN         NaN   
2023-01-06  1.066382         1.348654  NaN            NaN         NaN   
2023-01-07  1.066382         0.000000  NaN            NaN         NaN   
2023-01-08  1.065598        -0.073526  NaN            NaN         NaN   
2023-01-09  1.073445         0.736383  NaN            NaN         NaN   
2023-01-10  1.073860         0.038659  NaN            NaN         NaN   

            seuil_bas  target  moyenne_mobile_7j  moyenne_mobile_21j  \
timestamp                                          

In [None]:
# --- Indicateur de sentiment ---
from gdeltdoc import GdeltDoc, Filters
from datetime import datetime, timedelta
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Télécharger VADER
nltk.download('vader_lexicon')

def recuperer_tous_articles_gdelt(start_date, end_date, keyword="EUR/USD", language='eng', chunk_days=30):
    """
    Récupère tous les articles GDELT entre start_date et end_date
    en faisant des requêtes par tranches de `chunk_days` jours.
    """
    # Initialiser VADER
    sid = SentimentIntensityAnalyzer()

    # Convertir les dates en objets datetime
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    # Initialiser GdeltDoc
    gd = GdeltDoc()
    all_articles = []

    # Parcourir par tranches de `chunk_days` jours
    current_start = start
    while current_start < end:
        current_end = min(current_start + timedelta(days=chunk_days), end)

        print(f"Récupération des articles entre {current_start.date()} et {current_end.date()}")

        # Créer les filtres pour la tranche actuelle
        f = Filters(
            start_date=current_start.strftime("%Y-%m-%d"),
            end_date=current_end.strftime("%Y-%m-%d"),
            num_records=250,
            keyword=keyword,
            language=language
        )

        # Récupérer les articles
        articles_df = gd.article_search(f)

        # Vérifier si le DataFrame n'est pas vide
        if not articles_df.empty:
            all_articles.append(articles_df)

        # Passer à la tranche suivante
        current_start = current_end + timedelta(days=1)

    # Concaténer tous les DataFrames
    if not all_articles:
        return pd.DataFrame()

    df = pd.concat(all_articles, ignore_index=True)

    # Nettoyer les colonnes utiles et convertir la date
    def convertir_date_gdelt(date_str):
        # Le format est AAAAMMJJTHHMMSSZ, on extrait AAAAMMJJ
        return datetime.strptime(date_str.split('T')[0], "%Y%m%d").date()

    df['date'] = df['seendate'].apply(convertir_date_gdelt)
    df['date'] = pd.to_datetime(df['date'])

    # Calculer le sentiment pour chaque article
    def calculer_sentiment(texte):
        if isinstance(texte, str):
            return sid.polarity_scores(texte)['compound']
        return 0

    df['sentiment'] = df['title'].apply(calculer_sentiment)

    return df

In [86]:
# --- Variables PCA ---

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

df = eur_usd.copy().dropna().reset_index()

X = df.select_dtypes(include=['float64','int64'])  # uniquement variables numériques
X_scaled = StandardScaler().fit_transform(X)

pca = PCA(n_components=0.9)  # garder assez de composantes pour expliquer 90% de la variance
X_pca = pca.fit_transform(X_scaled)

# Créer un DataFrame avec les nouvelles variables PCA
df_pca = pd.DataFrame(X_pca, columns=[f'PC{i+1}' for i in range(X_pca.shape[1])])

df_extended = pd.concat([df, df_pca], axis=1)

kmeans = KMeans(n_clusters=3, random_state=42)  # exemple avec 3 clusters
df_extended['cluster_kmeans'] = kmeans.fit_predict(X_pca)


print(df_extended.shape)
print(df_extended.head(10))

(964, 19)
   timestamp   eur_usd  taux_croissance       vol  rendement_10j  seuil_haut  \
0 2023-02-01  1.101249         1.394181  0.535094       1.361144    1.070189   
1 2023-02-02  1.090477        -0.978158  0.566828       0.299881    1.133656   
2 2023-02-03  1.082778        -0.705972  0.532736      -0.551134    1.065472   
3 2023-02-04  1.082778         0.000000  0.524549      -0.859726    1.049097   
4 2023-02-05  1.079051        -0.344217  0.503796      -0.954313    1.007592   
5 2023-02-07  1.072938        -0.566511  0.457390      -1.316495    0.914780   
6 2023-02-08  1.071639        -0.121095  0.458120      -1.435996    0.916239   
7 2023-02-09  1.074056         0.225552  0.459325      -1.200795    0.918650   
8 2023-02-10  1.069919        -0.385171  0.445048      -1.385010    0.890097   
9 2023-02-11  1.070148         0.021403  0.444995      -1.469313    0.889989   

   seuil_bas  target  moyenne_mobile_7j  moyenne_mobile_21j  \
0  -1.070189       1           1.089052       

In [79]:
#----Variables macro----

# Inflation

start_date = df_extended["timestamp"].iloc[0].strftime("%Y-%m-%d")
print(start_date)
end_date = df_extended["timestamp"].iloc[-1].strftime("%Y-%m-%d")
print(end_date)

infl_eur = taceconomics.getdata(f"EUROSTAT/EI_CPHI_M_CP-HI00_NSA_HICP2015/EUZ?collapse=D&transform=growth_yoy&start_date={start_date}")
infl_us = taceconomics.getdata(f"FRED/CPIAUCSL/USA?collapse=D&transform=growth_yoy&start_date={start_date}")

# infl_eur = taceconomics.getdata(f"IFS/PCPIHA_IX_M/EUZ?start_date={start_date}")
# infl_us = taceconomics.getdata(f"IFS/PCPI_IX_M/USA?start_date={start_date}")

df_macro = pd.DataFrame()
df_macro["inflation_eur"] = infl_eur
df_macro["inflation_us"] = infl_us

 # Taux interets

ti_eur = taceconomics.getdata(f"ECB/FM_D_EUR_4F_KR_DFR_LEV/EUZ?collapse=D&collapse_mode=end_of_period&start_date={start_date}")
ti_us = taceconomics.getdata(f"DS/USPRATE./WLD?collapse=D&start_date={start_date}")

# ti_eur = taceconomics.getdata(f"IFS/FPOLM_PA_M/EUZ?start_date={start_date}")
# ti_us = taceconomics.getdata(f"IFS/FPOLM_PA_M/USA?start_date={start_date}")

df_macro["interest_rate_eur"] = ti_eur
df_macro["interest_rate_us"] = ti_us

print(df_macro.tail(10))
print(df_macro.shape)

2023-02-01
2025-10-05
            inflation_eur  inflation_us  interest_rate_eur  interest_rate_us
timestamp                                                                   
2025-09-21       2.227488           NaN                2.0              4.25
2025-09-22       2.227488           NaN                2.0              4.25
2025-09-23       2.227488           NaN                2.0              4.25
2025-09-24       2.227488           NaN                2.0              4.25
2025-09-25       2.227488           NaN                2.0              4.25
2025-09-26       2.227488           NaN                2.0              4.25
2025-09-27       2.227488           NaN                2.0              4.25
2025-09-28       2.227488           NaN                2.0              4.25
2025-09-29       2.227488           NaN                2.0              4.25
2025-09-30       2.227488           NaN                2.0              4.25
(973, 4)


In [61]:
df_final = pd.concat([df_extended, df_macro], axis=1)
print(df_final.tail(10))
print(df_final.columns)

           timestamp  eur_usd  taux_croissance  vol  rendement_10j  \
2025-09-21       NaT      NaN              NaN  NaN            NaN   
2025-09-22       NaT      NaN              NaN  NaN            NaN   
2025-09-23       NaT      NaN              NaN  NaN            NaN   
2025-09-24       NaT      NaN              NaN  NaN            NaN   
2025-09-25       NaT      NaN              NaN  NaN            NaN   
2025-09-26       NaT      NaN              NaN  NaN            NaN   
2025-09-27       NaT      NaN              NaN  NaN            NaN   
2025-09-28       NaT      NaN              NaN  NaN            NaN   
2025-09-29       NaT      NaN              NaN  NaN            NaN   
2025-09-30       NaT      NaN              NaN  NaN            NaN   

            seuil_haut  seuil_bas  target  moyenne_mobile_7j  \
2025-09-21         NaN        NaN     NaN                NaN   
2025-09-22         NaN        NaN     NaN                NaN   
2025-09-23         NaN        NaN    

In [87]:
# MODELISATION

# xgb_single_model_timeseries.py
"""
XGBoost focalisé pour prédiction d'un choc à 2 semaines sur EUR/USD.
Sorties :
- AUC, Gini, courbe ROC (test),
- seuil optimal (Youden) et matrice de confusion + métriques associées,
- modèle final enregistré (joblib).
"""

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, balanced_accuracy_score
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from datetime import datetime

# -----------------------
# Config 
# -----------------------
RANDOM_STATE = 42
# Fractions pour split contigu (doivent sommer à 1.0)
TRAIN_FRAC = 0.70
VAL_FRAC   = 0.15
TEST_FRAC  = 0.15

# TimeSeries CV splits (pour GridSearch)
TS_SPLITS = 5

# Grid search params 
XGB_PARAM_GRID = {
    'n_estimators': [100, 300],
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Metric d'optimisation pour la recherche d'hyperparamètres (ici F1, tu peux changer en 'roc_auc')
GRID_SCORING = 'f1'

# Early stopping rounds pour re-entrainement final
EARLY_STOPPING_ROUNDS = 50

# Fichiers de sortie
MODEL_OUTPATH = "xgb_final_model.joblib"
IMPUTER_OUTPATH = "imputer.joblib"

# ===============================
# Décalage de la variable cible pour prédire le choc à 2 semaines
# ===============================

SHIFT_DAYS = 10  # environ 2 semaines ouvrées

# On crée une copie pour éviter les erreurs d’alignement
df_final = df_extended.copy()

# Décaler la target vers le passé : les features du jour t servent à prédire le choc à t+10
df_final["target_future"] = df_final["target"].shift(-SHIFT_DAYS)

# Supprimer les lignes où la cible future est manquante (en fin de série)
df_final = df_final.dropna(subset=["target_future"])

# Définir X (features actuelles) et y (choc futur)
X = df_final.drop(columns=["target", "target_future"])  # on garde uniquement les variables explicatives
y = df_final["target_future"].astype(int)

print("✅ Variable cible correctement décalée de 2 semaines.")
print(f"Shape finale : {X.shape}, target positive rate = {y.mean():.3f}")

# -----------------------
# Chargement & checks
# -----------------------
print("Chargement des données...")

df = df_final.copy().dropna().sort_values("timestamp").reset_index(drop=True)

# Features & target
X = df.drop(columns=["target", "target_future"])
y = df["target_future"].astype(int)

# On suppose tout numérique — sinon adapter types/catégoriques
num_cols = X.columns.tolist()

# -----------------------
# Split contigu (train / val / test)
# -----------------------
n = len(df)
if not abs(TRAIN_FRAC + VAL_FRAC + TEST_FRAC - 1.0) < 1e-8:
    raise ValueError("TRAIN_FRAC + VAL_FRAC + TEST_FRAC doit être égal à 1.0")

train_end = int(n * TRAIN_FRAC)
val_end = train_end + int(n * VAL_FRAC)

X_train = X.iloc[:train_end].copy()
y_train = y.iloc[:train_end].copy()

X_val = X.iloc[train_end:val_end].copy()
y_val = y.iloc[train_end:val_end].copy()

X_test = X.iloc[val_end:].copy()
y_test = y.iloc[val_end:].copy()

print(f"Tailles -> train: {X_train.shape}, val: {X_val.shape}, test: {X_test.shape}")

# -----------------------
# Imputation (median) - fit uniquement sur train
# -----------------------
imputer = SimpleImputer(strategy='median')
imputer.fit(X_train[num_cols])

X_train_imp = pd.DataFrame(imputer.transform(X_train[num_cols]), index=X_train.index, columns=num_cols)
X_val_imp   = pd.DataFrame(imputer.transform(X_val[num_cols]), index=X_val.index, columns=num_cols)
X_test_imp  = pd.DataFrame(imputer.transform(X_test[num_cols]), index=X_test.index, columns=num_cols)

# Save imputer for reproducibility / production
joblib.dump(imputer, IMPUTER_OUTPATH)
print(f"Imputer sauvegardé -> {IMPUTER_OUTPATH}")

# -----------------------
# GridSearchCV (TimeSeriesSplit) pour chercher les meilleurs hyperparams
# -----------------------
print("\nLancement GridSearchCV (TimeSeriesSplit) sur XGBoost...")

xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=RANDOM_STATE, n_jobs=-1)

tscv = TimeSeriesSplit(n_splits=TS_SPLITS)

# GridSearchCV sur les features imputées (pas de scaling nécessaire pour XGBoost)
gscv = GridSearchCV(
    estimator=xgb,
    param_grid=XGB_PARAM_GRID,
    scoring=GRID_SCORING,
    cv=tscv,
    n_jobs=-1,
    verbose=1,
    refit=True
)

gscv.fit(X_train_imp, y_train)
print("Meilleurs paramètres trouvés (GridSearchCV):")
print(gscv.best_params_)
print(f"Best CV {GRID_SCORING}: {gscv.best_score_:.4f}")

# -----------------------
# Ré-entraînement final avec early stopping sur l'échantillon de validation
# -----------------------
best_params = gscv.best_params_.copy()

# Conserver paramètres choisis et activer early stopping via eval_set
xgb_final = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=RANDOM_STATE,
                          n_jobs=-1, **best_params)

print("\nRé-entrainement final avec early stopping sur validation (eval_set)...")
xgb_final.fit(
    X_train_imp, y_train,
    eval_set=[(X_val_imp, y_val)],
    early_stopping_rounds=EARLY_STOPPING_ROUNDS,
    verbose=False
)

# Sauvegarde modèle
joblib.dump(xgb_final, MODEL_OUTPATH)
print(f"Modèle final sauvegardé -> {MODEL_OUTPATH}")

# -----------------------
# Prédiction out-of-sample (test) + évaluation
# -----------------------
print("\nÉvaluation out-of-sample (test)...")
probs_test = xgb_final.predict_proba(X_test_imp)[:, 1]
auc = roc_auc_score(y_test, probs_test)
gini = 2*auc - 1
fpr, tpr, thresholds = roc_curve(y_test, probs_test)

# Seuil optimal - Youden (TPR - FPR maximisé)
youden_idx = np.argmax(tpr - fpr)
opt_threshold_youden = thresholds[youden_idx]

# Seuil optimisant F1 (pour info)
f1_scores = [f1_score(y_test, (probs_test >= t).astype(int), zero_division=0) for t in thresholds]
opt_threshold_f1 = thresholds[np.argmax(f1_scores)]

# Choix du seuil final : tu peux choisir Youden ou F1 ; ici on utilise Youden tout en reportant F1-opt
threshold = opt_threshold_youden

preds = (probs_test >= threshold).astype(int)
tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()

precision = precision_score(y_test, preds, zero_division=0)
recall = recall_score(y_test, preds, zero_division=0)            # sensitivity
accuracy = accuracy_score(y_test, preds)
f1 = f1_score(y_test, preds, zero_division=0)
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
bal_acc = balanced_accuracy_score(y_test, preds)

# -----------------------
# Affichage résultats (out-of-sample uniquement)
# -----------------------
print(f"\nRésultats (test) - XGBoost")
print(f"AUC (ROC): {auc:.4f}")
print(f"Gini: {gini:.4f}")
print(f"Seuil Youden: {opt_threshold_youden:.4f} | Seuil F1-opt: {opt_threshold_f1:.4f}")
print("Matrice de confusion (tn, fp, fn, tp):", (int(tn), int(fp), int(fn), int(tp)))
print(f"Precision: {precision:.4f}")
print(f"Recall (Sensitivity): {recall:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"F1: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Balanced Accuracy: {bal_acc:.4f}")

# -----------------------
# Tracer ROC (test)
# -----------------------
plt.figure(figsize=(7,6))
plt.plot(fpr, tpr, label=f"XGBoost (AUC={auc:.3f})")
plt.plot([0,1],[0,1], linestyle='--', alpha=0.6)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC - Out-of-sample (test)")
plt.legend()
plt.grid(True)
plt.show()

# -----------------------
# Tracer Matrice de confusion (test)
# -----------------------
cm = np.array([[tn, fp],[fn, tp]])
plt.figure(figsize=(4,3))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f"Matrice de confusion (test) - seuil={threshold:.4f}")
plt.ylabel("Vraie classe")
plt.xlabel("Classe prédite")
plt.show()

# -----------------------
# Importance des features (optionnel, utile pour interprétation)
# -----------------------
try:
    imp = pd.Series(xgb_final.feature_importances_, index=num_cols).sort_values(ascending=False)
    print("\nTop 10 features par importance (XGBoost):")
    print(imp.head(10))
    plt.figure(figsize=(6,4))
    imp.head(15).plot(kind='bar')
    plt.title("Feature importances (XGBoost)")
    plt.tight_layout()
    plt.show()
except Exception:
    pass

# -----------------------
# Résumé final en dictionary (pratique pour reporting programmatique)
# -----------------------
report = {
    'auc': float(auc),
    'gini': float(gini),
    'threshold_youden': float(opt_threshold_youden),
    'threshold_f1': float(opt_threshold_f1),
    'threshold_used': float(threshold),
    'confusion': {'tn': int(tn), 'fp': int(fp), 'fn': int(fn), 'tp': int(tp)},
    'precision': float(precision),
    'recall': float(recall),
    'specificity': float(specificity),
    'f1': float(f1),
    'accuracy': float(accuracy),
    'balanced_accuracy': float(bal_acc),
    'best_params': best_params,
    'trained_at': datetime.utcnow().isoformat() + 'Z'
}

# Enregistrer le reporting si souhaité
pd.Series(report).to_json("xgb_report_test.json")
print("\nReport JSON sauvegardé -> xgb_report_test.json")
print("\nTerminé. Seuls les résultats out-of-sample (test) ont été affichés.")



✅ Variable cible correctement décalée de 2 semaines.
Shape finale : (954, 18), target positive rate = 0.059
Chargement des données...
Tailles -> train: (667, 18), val: (143, 18), test: (144, 18)


DTypePromotionError: The DType <class 'numpy.dtypes.DateTime64DType'> could not be promoted by <class 'numpy.dtypes.Float64DType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Int32DType'>)