In [12]:
import pandas as pd
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber
from sklearn.metrics import mean_absolute_error, mean_squared_error
from prophet import Prophet
import xgboost as xgb
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation
import warnings

warnings.filterwarnings('ignore')
tf.random.set_seed(42)

# Configurare Fisiere de intrare (generate la pasul anterior)
INPUT_FILES = ['processed_data_House1.pkl', 'processed_data_House2.pkl']

print("Librarii incarcate. Configurare GATA.")

Librarii incarcate. Configurare GATA.


In [13]:
def train_models_for_house(filename):
    house_name = filename.replace('processed_data_', '').replace('.pkl', '')
    print(f"\n{'='*50}\n[START] Antrenare pentru: {house_name}\n{'='*50}")
    
    # 1. Incarcare Date
    try:
        with open(filename, 'rb') as f:
            data = pickle.load(f)
    except FileNotFoundError:
        print(f"Nu gasesc {filename}. Sari peste.")
        return

    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    scaler = data['scaler']
    df_1H = data['df_1H']
    train_size = data['train_size']
    WINDOW_SIZE = data['WINDOW_SIZE']
    test_data = data['test_data'] 
    n_features = X_train.shape[2]
    
    rezultate = {}
    timpi = {}

    # --- 1. PROPHET ---
    print("\n--- 1. PROPHET ---")
    start = time.time()
    
    # Pregatire date Prophet
    df_prophet = df_1H.reset_index()[['dt', 'Aggregate', 'Cluster', 'Hour_Sin', 'Hour_Cos', 'IsWeekend']]
    df_prophet.columns = ['ds', 'y', 'Cluster', 'Hour_Sin', 'Hour_Cos', 'IsWeekend']
    
    df_prophet_train = df_prophet.iloc[:train_size]
    df_prophet_test = df_prophet.iloc[train_size:]
    
    m = Prophet(daily_seasonality=True, weekly_seasonality=True, uncertainty_samples=0)
    m.add_seasonality(
    name='monthly',
    period=30.5,
    fourier_order=5
)
    m.add_regressor('Cluster')
    m.add_regressor('Hour_Sin')
    m.add_regressor('Hour_Cos')
    m.add_regressor('IsWeekend')
    
    m.fit(df_prophet_train)
    
    # Predictie
    future = m.make_future_dataframe(periods=len(df_prophet_test), freq='1H')
    future['Cluster'] = pd.concat([df_prophet_train['Cluster'], df_prophet_test['Cluster']]).values
    future['Hour_Sin'] = pd.concat([df_prophet_train['Hour_Sin'], df_prophet_test['Hour_Sin']]).values
    future['Hour_Cos'] = pd.concat([df_prophet_train['Hour_Cos'], df_prophet_test['Hour_Cos']]).values
    future['IsWeekend'] = pd.concat([df_prophet_train['IsWeekend'], df_prophet_test['IsWeekend']]).values
    
    forecast = m.predict(future)
    rezultate['Prophet'] = forecast['yhat'].iloc[-len(df_prophet_test):].values
    timpi['Prophet'] = time.time() - start
    print(f" -> Gata ({timpi['Prophet']:.1f}s)")

    # --- 2. LSTM (Stacked) ---
    print("\n--- 2. LSTM (Stacked) ---")
    start = time.time()
    
    model_lstm = Sequential()
    model_lstm.add(LSTM(64, return_sequences=True, input_shape=(WINDOW_SIZE, n_features)))
    model_lstm.add(Dropout(0.3))
    model_lstm.add(LSTM(32, return_sequences=False))
    model_lstm.add(Dropout(0.3))
    model_lstm.add(Dense(16, activation='relu'))
    model_lstm.add(Dense(1))
    model_lstm.compile(optimizer='adam', loss=Huber(delta=1.0))
    
    model_lstm.fit(X_train, y_train, epochs=20, batch_size=64, verbose=1) 
    
    pred = model_lstm.predict(X_test, verbose=0)
    rezultate['LSTM'] = scaler.inverse_transform(pred).flatten()
    timpi['LSTM'] = time.time() - start
    print(f" -> Gata ({timpi['LSTM']:.1f}s)")

    # --- 3. GRU ---
    print("\n--- 3. GRU ---")
    start = time.time()
    
    model_gru = Sequential()
    model_gru.add(GRU(64, return_sequences=True, input_shape=(WINDOW_SIZE, n_features)))
    model_gru.add(Dropout(0.3))
    model_gru.add(GRU(32, return_sequences=False))
    model_gru.add(Dense(1))
    model_gru.compile(optimizer='adam', loss=Huber(delta=1.0))
    
    model_gru.fit(X_train, y_train, epochs=20, batch_size=64, verbose=1)
    
    pred = model_gru.predict(X_test, verbose=0)
    rezultate['GRU'] = scaler.inverse_transform(pred).flatten()
    timpi['GRU'] = time.time() - start
    print(f" -> Gata ({timpi['GRU']:.1f}s)")

    # --- 4. SIMPLE RNN ---
    print("\n--- 4. SIMPLE RNN ---")
    start = time.time()
    
    model_rnn = Sequential()
    model_rnn.add(SimpleRNN(32, input_shape=(WINDOW_SIZE, n_features), activation='tanh'))
    model_rnn.add(Dropout(0.2))
    model_rnn.add(Dense(1))
    opt = Adam(learning_rate=0.0001, clipvalue=1.0)
    model_rnn.compile(optimizer=opt, loss='mse')
    
    model_rnn.fit(X_train, y_train, epochs=20, batch_size=128, verbose=1)
    
    pred = model_rnn.predict(X_test, verbose=0)
    rezultate['RNN'] = scaler.inverse_transform(pred).flatten()
    timpi['RNN'] = time.time() - start
    print(f" -> Gata ({timpi['RNN']:.1f}s)")

    # --- 6. XGBOOST & LIGHTGBM ---
    print("\n--- 6. ML Clasic (XGBoost & LightGBM) ---")
    # Feature Engineering Tabelar pentru ML
    df_ml = df_1H.copy()
    for lag in [1, 24, 168]:
        df_ml[f'lag_{lag}'] = df_ml['Aggregate'].shift(lag)
    
    df_ml['rolling_mean'] = df_ml['Aggregate'].shift(1).rolling(24).mean()
    df_ml['rolling_std'] = df_ml['Aggregate'].shift(1).rolling(24).std()
    df_ml.dropna(inplace=True)
    
    feats = [c for c in df_ml.columns if 'lag' in c or 'rolling' in c or c in ['Cluster', 'Hour_Sin', 'Hour_Cos','IsWeekend', 'DayOfWeek']]
    
    X_ml = df_ml[feats].values
    y_ml = df_ml['Aggregate'].values
    
    # Split
    test_len = len(test_data)
    train_len_ml = len(y_ml) - test_len
    
    X_train_ml, y_train_ml = X_ml[:train_len_ml], y_ml[:train_len_ml]
    X_test_ml, y_test_ml = X_ml[train_len_ml:], y_ml[train_len_ml:]
    
    # XGB
    start = time.time()
    xgb_model = xgb.XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=6, n_jobs=-1, random_state=42)
    xgb_model.fit(X_train_ml, y_train_ml)
    rezultate['XGBoost'] = xgb_model.predict(X_test_ml)
    timpi['XGBoost'] = time.time() - start
    print(f" -> XGBoost Gata ({timpi['XGBoost']:.1f}s)")
    
    # LGBM
    start = time.time()
    lgb_model = lgb.LGBMRegressor(n_estimators=500, learning_rate=0.05, num_leaves=31, n_jobs=-1, random_state=42, verbosity=-1)
    lgb_model.fit(X_train_ml, y_train_ml)
    rezultate['LightGBM'] = lgb_model.predict(X_test_ml)
    timpi['LightGBM'] = time.time() - start
    print(f" -> LightGBM Gata ({timpi['LightGBM']:.1f}s)")

    # --- SALVARE REZULTATE ---
    save_name = f'results_{house_name}.pkl'
    package = {
        'rezultate': rezultate,
        'timpi': timpi,
        'y_true': test_data['Aggregate'].values, 
        'test_index': test_data.index
    }
    
    with open(save_name, 'wb') as f:
        pickle.dump(package, f)
        
    print(f"\n[SALVAT] Rezultate salvate in {save_name}")

In [14]:
# Rulam totul
for file in INPUT_FILES:
    train_models_for_house(file)

print("\n" + "="*50)
print("--- \n\n[FINAL] Toate modelele pentru toate casele au fost antrenate! ---")
print("GATA! Fisierele .pkl sunt generate.")
print("Poti trece acum la urmatorul notebook: '03_Analysis_Comparison.ipynb'")
print("="*50)


[START] Antrenare pentru: House1

--- 1. PROPHET ---


19:12:44 - cmdstanpy - INFO - Chain [1] start processing
19:12:46 - cmdstanpy - INFO - Chain [1] done processing


 -> Gata (2.3s)

--- 2. LSTM (Stacked) ---
Epoch 1/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - loss: 0.3586
Epoch 2/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.3290
Epoch 3/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.3159
Epoch 4/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.3084
Epoch 5/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.3049
Epoch 6/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.3005
Epoch 7/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.2970
Epoch 8/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.2958
Epoch 9/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.2926
Epoch 10/20
[1m118/118[0m [32m━━━━━━

19:15:07 - cmdstanpy - INFO - Chain [1] start processing
19:15:07 - cmdstanpy - INFO - Chain [1] done processing


 -> Gata (3.1s)

--- 2. LSTM (Stacked) ---
Epoch 1/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - loss: 0.4507
Epoch 2/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.4200
Epoch 3/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.4006
Epoch 4/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.3815
Epoch 5/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.3695
Epoch 6/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.3629
Epoch 7/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.3562
Epoch 8/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.3526
Epoch 9/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.3465
Epoch 10/20
[1m118/118[0m [32m━━━━━━