In [1]:
import pandas as pd
from preprocessing import clean_raw_data, create_features, prepare_training_data
from fetch_data import fetch_weather


city = "Genève"
df_raw = fetch_weather(city, past_days=730)

Chargement des données locales depuis data/_genève_730d.csv


In [2]:
df_clean = clean_raw_data(df_raw)
print(f"🔍 Données nettoyées : {df_clean.shape}")
df_clean.tail()

🔍 Données nettoyées : (731, 13)


Unnamed: 0,time,temperature_2m_max,temperature_2m_min,precipitation_sum,rain_sum,showers_sum,snowfall_sum,windspeed_10m_max,windgusts_10m_max,sunshine_duration,shortwave_radiation_sum,et0_fao_evapotranspiration,weathercode
726,2025-04-04,19.1,6.0,0.0,0.0,0.0,0.0,8.0,18.4,42218.01,20.7,3.2,3.0
727,2025-04-05,19.2,6.8,0.0,0.0,0.0,0.0,8.4,20.9,42840.75,20.43,3.24,1.0
728,2025-04-06,14.6,7.0,0.0,0.0,0.0,0.0,25.1,52.2,43402.93,22.08,3.39,3.0
729,2025-04-07,6.2,5.0,0.0,0.0,0.0,0.0,16.5,38.9,22948.084,7.364,0.88,0.0
730,2025-04-08,16.604795,8.356164,3.643073,3.555418,0.0,0.061358,12.748082,27.706301,30844.581043,13.699684,2.373882,33.772603


In [3]:
n_lags = 3
df_features = create_features(df_clean, n_lags=n_lags)
print(f"🔍 Données après `create_features` : {df_features.shape}")
df_features.tail()

🔍 Données après `create_features` : (727, 49)


Unnamed: 0,time,weathercode,target_temperature_2m_max,target_temperature_2m_min,target_temperature_2m_mean,target_will_rain,target_windspeed_10m_max,temp_amplitude,rain_to_precip_ratio,wind_diff,...,sunshine_duration_lag_2,sunshine_duration_lag_3,shortwave_radiation_sum_lag_1,shortwave_radiation_sum_lag_2,shortwave_radiation_sum_lag_3,et0_fao_evapotranspiration_lag_1,et0_fao_evapotranspiration_lag_2,et0_fao_evapotranspiration_lag_3,temp_max_rolling_mean_3,rain_sum_rolling_std_3
725,2025-04-03,3.0,19.1,6.0,12.55,0,8.0,13.3,0.0,12.7,...,40030.78,41654.4,19.21,21.26,20.88,2.48,2.96,2.95,13.7,0.057735
726,2025-04-04,3.0,19.2,6.8,13.0,0,8.4,13.1,0.0,10.4,...,42056.66,40030.78,21.11,19.21,21.26,3.02,2.48,2.96,16.5,0.0
727,2025-04-05,1.0,14.6,7.0,10.8,0,25.1,12.4,0.0,12.5,...,42429.77,42056.66,20.7,21.11,19.21,3.2,3.02,2.48,18.5,0.0
728,2025-04-06,3.0,6.2,5.0,5.6,0,16.5,7.6,0.0,27.1,...,42218.01,42429.77,20.43,20.7,21.11,3.24,3.2,3.02,17.633333,0.0
729,2025-04-07,0.0,16.604795,8.356164,12.480479,1,12.748082,1.2,0.0,22.4,...,42840.75,42218.01,22.08,20.43,20.7,3.39,3.24,3.2,13.333333,0.0


In [4]:
target_columns = [
    "target_temperature_2m_max",
    "target_temperature_2m_min",
    "target_temperature_2m_mean",
    "target_will_rain",
    "target_windspeed_10m_max"
]

X, Y, scaler = prepare_training_data(df_features, target_columns)
print(f"✅ X.shape = {X.shape}, Y.shape = {Y.shape}")

✅ X.shape = (727, 43), Y.shape = (727, 5)


In [5]:
if len(X) > 0:
    print("Dernier échantillon (X):", X[-1])
    print("Dernier échantillon (Y):", Y[-1])
else:
    print("❌ Aucune donnée disponible dans X")

Dernier échantillon (X): [-1.21404492 -2.26789553 -1.11048165  1.293002   -0.53578781 -0.80137232
  1.41973134 -0.14232693 -0.24620503  0.31313321  0.30196802 -0.20645037
 -0.2358748  -0.35452919 -0.52538156 -0.52538156 -0.52538156 -0.5202875
 -0.5202875  -0.5202875   0.          0.          0.         -0.12018852
 -0.12018852 -0.12018852  2.15866561 -0.75855203 -0.82974436  2.19845087
 -0.60873896 -0.83442234  0.85113847  0.81472233  0.77235931  1.06537832
  0.85749395  0.89113625  0.62005369  0.5291801   0.50515634 -0.40959977
 -0.79015599]
Dernier échantillon (Y): [16.60479452  8.35616438 12.48047945  1.         12.74808219]


In [6]:
print("Colonnes avec NaN dans df_features :")
print(df_features.isna().sum()[df_features.isna().sum() > 0])

Colonnes avec NaN dans df_features :
Series([], dtype: int64)
