# Crop Yield Prediction – Final Model Pipeline (Rev9)

## 1. Setup & Imports

In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import joblib
import os
from scipy.signal import detrend
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import xgboost as xgb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset
import optuna
import shap

np.random.seed(42)
torch.manual_seed(42)
sns.set_style("whitegrid")
warnings.filterwarnings('ignore')
print("All libraries loaded.")

All libraries loaded.


## 2. Model Selection Menu

In [54]:
RUN_MODELS = {
    'LR': True,   # Baseline Linear Regression
    'RF': True,   # Random Forest
    'XGB': True,  # XGBoost
    'LSTM': True, # LSTM
    'CNN': True   # CNN
}

RUN_OPTUNA = True

## 3. Load & Preprocess Data (Corrected)

In [55]:
# Corrected Data Preprocessing
try:
    df = pd.read_csv("cleaned_crop_data.csv")
    print(f"Loaded initial data: {df.shape}")
except FileNotFoundError:
    raise FileNotFoundError("Ensure 'cleaned_crop_data.csv' is present. Run the EDA notebook first.")

TARGET = 'hg/ha_yield'
TIME_COL = 'Year'
CAT_COLS = ['Area', 'Item']
NUMERIC_COLS = ['average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp', 'fertilizer_kg/ha', 'solar_radiation_MJ/m2-day']
TARGET_DET = 'yield_detrended'

# 1. Split data chronologically
TRAIN_END = 2007
VAL_END = 2010
train_df_orig = df[df[TIME_COL] <= TRAIN_END].copy()
val_df_orig = df[(df[TIME_COL] > TRAIN_END) & (df[TIME_COL] <= VAL_END)].copy()
test_df_orig = df[df[TIME_COL] > VAL_END].copy()
print(f"1. Initial data split: Train: {train_df_orig.shape}, Val: {val_df_orig.shape}, Test: {test_df_orig.shape}")

# 2. Fit encoders ON TRAINING DATA ONLY
le_area = LabelEncoder().fit(train_df_orig['Area'])
le_item = LabelEncoder().fit(train_df_orig['Item'])
for d in [train_df_orig, val_df_orig, test_df_orig]:
    d['Area_Encoded'] = d['Area'].apply(lambda x: le_area.transform([x])[0] if x in le_area.classes_ else -1)
    d['Item_Encoded'] = d['Item'].apply(lambda x: le_item.transform([x])[0] if x in le_item.classes_ else -1)
print("2. Encoders fitted on train set and applied to all sets.")

# 3. Fit trend models ON TRAINING DATA ONLY
print("3. Fitting trend models on training data...")
trend_models = {}
for group, group_df in train_df_orig.groupby(CAT_COLS):
    trend_model = LinearRegression()
    trend_model.fit(group_df[[TIME_COL]], group_df[TARGET])
    trend_models[group] = trend_model

global_trend_model = LinearRegression().fit(train_df_orig[[TIME_COL]], train_df_orig[TARGET])
print(f"   Fitted {len(trend_models)} group-specific trend models and 1 global model.")

# 4. Apply detrending to all datasets
for df_set in [train_df_orig, val_df_orig, test_df_orig]:
    df_set['yield_trend'] = 0.0
    for group, group_df in df_set.groupby(CAT_COLS):
        model = trend_models.get(group, global_trend_model)
        trend_prediction = model.predict(group_df[[TIME_COL]])
        df_set.loc[group_df.index, 'yield_trend'] = trend_prediction
    df_set['yield_detrended'] = df_set[TARGET] - df_set['yield_trend']
print("   Detrending applied to all datasets.")

# 5. Create lags and finalize split for ML models
full_df_ml = pd.concat([train_df_orig, val_df_orig, test_df_orig]).sort_values(CAT_COLS + [TIME_COL])
lag_cols = ['yield_detrended'] + NUMERIC_COLS
for col in lag_cols:
    for lag in [1, 2]:
        full_df_ml[f'{col}_lag{lag}'] = full_df_ml.groupby(CAT_COLS)[col].shift(lag)

df_ml = full_df_ml.dropna().copy()
train_df = df_ml[df_ml[TIME_COL] <= TRAIN_END].copy()
val_df = df_ml[(df_ml[TIME_COL] > TRAIN_END) & (df_ml[TIME_COL] <= VAL_END)].copy()
test_df = df_ml[df_ml[TIME_COL] > VAL_END].copy()
print(f"4. Lags created for ML models: Train: {train_df.shape}, Val: {val_df.shape}, Test: {test_df.shape}")

# 6. Fit scalers for ML models ON TRAINING DATA ONLY
lagged_cols = [c for c in df_ml.columns if '_lag' in c]
ml_features = NUMERIC_COLS + lagged_cols + ['Area_Encoded', 'Item_Encoded']
scale_cols = NUMERIC_COLS + lagged_cols

x_scaler = StandardScaler()
train_df[scale_cols] = x_scaler.fit_transform(train_df[scale_cols])
val_df[scale_cols] = x_scaler.transform(val_df[scale_cols])
test_df[scale_cols] = x_scaler.transform(test_df[scale_cols])

y_scaler = StandardScaler()
train_df[TARGET_DET] = y_scaler.fit_transform(train_df[[TARGET_DET]])
val_df[TARGET_DET] = y_scaler.transform(val_df[[TARGET_DET]])
test_df[TARGET_DET] = y_scaler.transform(test_df[[TARGET_DET]])
print("5. X and y scalers for ML models fitted and applied.")

# 7. Save transformers
joblib.dump(x_scaler, 'scaler.joblib')
joblib.dump(y_scaler, 'y_scaler.joblib')
joblib.dump(le_area, 'le_area.joblib')
joblib.dump(le_item, 'le_item.joblib')
joblib.dump(trend_models, 'trend_models.joblib')
joblib.dump(global_trend_model, 'global_trend_model.joblib')
print("6. All transformers saved to disk.")

N_AREAS = len(le_area.classes_)
N_ITEMS = len(le_item.classes_)

Loaded initial data: (25932, 9)
1. Initial data split: Train: (19032, 9), Val: (3424, 9), Test: (3476, 9)
2. Encoders fitted on train set and applied to all sets.
3. Fitting trend models on training data...
   Fitted 588 group-specific trend models and 1 global model.
   Detrending applied to all datasets.
4. Lags created for ML models: Train: (17857, 25), Val: (3421, 25), Test: (3459, 25)
5. X and y scalers for ML models fitted and applied.
6. All transformers saved to disk.


## 4. Prepare Inputs (Corrected)

In [56]:
# ML Inputs
X_train_ml = train_df[ml_features]
y_train_ml = train_df[TARGET_DET]
X_val_ml = val_df[ml_features]
y_val_ml = val_df[TARGET_DET]
X_test_ml = test_df[ml_features]
y_test_ml = test_df[TARGET_DET]
print("ML inputs prepared.")

# DL Inputs
LOOKBACK = 5
DL_FEATS = NUMERIC_COLS + ['Area_Encoded', 'Item_Encoded']

# Correctly scale DL features
scaler_dl_x = StandardScaler()
train_df_orig[NUMERIC_COLS] = scaler_dl_x.fit_transform(train_df_orig[NUMERIC_COLS])
val_df_orig[NUMERIC_COLS] = scaler_dl_x.transform(val_df_orig[NUMERIC_COLS])
test_df_orig[NUMERIC_COLS] = scaler_dl_x.transform(test_df_orig[NUMERIC_COLS])

# Use the already fitted y_scaler for the target
train_df_orig[TARGET_DET] = y_scaler.transform(train_df_orig[[TARGET_DET]])
val_df_orig[TARGET_DET] = y_scaler.transform(val_df_orig[[TARGET_DET]])
test_df_orig[TARGET_DET] = y_scaler.transform(test_df_orig[[TARGET_DET]])
print("DL features and target scaled.")

def create_sequences(data, lookback, feats, target):
    X, y = [], []
    # Keep track of original index
    y_indices = []
    for _, group in data.groupby(CAT_COLS):
        if len(group) < lookback:
            continue
        gf = group[feats].values
        gt = group[target].values
        indices = group.index
        for i in range(len(group) - lookback + 1):
            X.append(gf[i:i+lookback])
            y.append(gt[i+lookback-1])
            y_indices.append(indices[i+lookback-1])
    return np.array(X), np.array(y), y_indices

X_train_seq, y_train_seq, _ = create_sequences(train_df_orig, LOOKBACK, DL_FEATS, TARGET_DET)
X_val_seq, y_val_seq, _ = create_sequences(val_df_orig, LOOKBACK, DL_FEATS, TARGET_DET)
X_test_seq, y_test_seq, y_test_indices = create_sequences(test_df_orig, LOOKBACK, DL_FEATS, TARGET_DET)
print("DL sequences created.")

# Create a reference dataframe for test set evaluation
test_df_dl_seq_ref = test_df_orig.loc[y_test_indices]

def split_dl(X):
    # The number of numeric features is len(NUMERIC_COLS)
    numeric_feature_count = len(NUMERIC_COLS)
    return [
        torch.tensor(X[..., :numeric_feature_count], dtype=torch.float32),
        torch.tensor(X[..., numeric_feature_count], dtype=torch.long),
        torch.tensor(X[..., numeric_feature_count+1], dtype=torch.long)
    ]

X_train_dl = split_dl(X_train_seq)
X_val_dl = split_dl(X_val_seq)
X_test_dl = split_dl(X_test_seq)

y_train_t = torch.tensor(y_train_seq, dtype=torch.float32).unsqueeze(1)
y_val_t = torch.tensor(y_val_seq, dtype=torch.float32).unsqueeze(1)
y_test_t = torch.tensor(y_test_seq, dtype=torch.float32).unsqueeze(1)
print("DL tensors created.")

ML inputs prepared.
DL features and target scaled.
DL sequences created.
DL tensors created.


## 5. Metrics

In [57]:
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(((y_true - y_pred) / (y_true + 1e-8)) ** 2)) * 100

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100

## 6. Optuna Objectives (Corrected)

In [58]:
def objective_lr(trial):
    # No hyperparameters to tune for Linear Regression
    model = LinearRegression()
    model.fit(X_train_ml, y_train_ml)
    preds = model.predict(X_val_ml)
    return np.sqrt(mean_squared_error(y_val_ml, preds)) # Return RMSE

def objective_rf(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 400),
        'max_depth': trial.suggest_int('max_depth', 5, 30),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_float('max_features', 0.5, 1.0)
    }
    model = RandomForestRegressor(random_state=42, n_jobs=-1, **params)
    model.fit(X_train_ml, y_train_ml)
    preds = model.predict(X_val_ml)
    return np.sqrt(mean_squared_error(y_val_ml, preds)) # Return RMSE

def objective_xgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000), 
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.05, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'early_stopping_rounds': 50, # Increase patience for lower LR
        'eval_metric': 'rmse'
    }

    model = xgb.XGBRegressor(
        random_state=42,
        **params
    )

    model.fit(
        X_train_ml, y_train_ml,
        eval_set=[(X_val_ml, y_val_ml)],
        verbose=False
    )

    preds = model.predict(X_val_ml)
    return np.sqrt(mean_squared_error(y_val_ml, preds)) # Return RMSE

class LSTMModel(nn.Module):
    def __init__(self, n_areas, n_items, lstm_units, dense_units, dropout):
        super().__init__()
        self.embed_area = nn.Embedding(n_areas, 10)
        self.embed_item = nn.Embedding(n_items, 5)
        self.lstm = nn.LSTM(len(NUMERIC_COLS) + 10 + 5, lstm_units, batch_first=True)
        self.drop = nn.Dropout(dropout)
        self.fc1 = nn.Linear(lstm_units, dense_units)
        self.fc2 = nn.Linear(dense_units, 1)
    def forward(self, num, area, item):
        e_area = self.embed_area(area)
        e_item = self.embed_item(item)
        x = torch.cat([num, e_area, e_item], dim=-1)
        out, _ = self.lstm(x)
        out = self.drop(out[:, -1])
        out = torch.relu(self.fc1(out))
        return self.fc2(out)

def train_dl(model, opt, loss_fn, train_loader, val_loader, target_scaler, epochs=100, patience=10, is_final=False):
    scheduler = ReduceLROnPlateau(opt, 'min', patience=5, factor=0.5)
    best_val_rmse = float('inf') # Optimize for RMSE
    wait = 0
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for x1, x2, x3, y in train_loader:
            opt.zero_grad()
            pred = model(x1, x2, x3)
            loss = loss_fn(pred, y)
            loss.backward()
            opt.step()
            train_loss += loss.item()
        train_losses.append(train_loss / len(train_loader))
        
        model.eval()
        with torch.no_grad():
            val_inputs = [x.to(next(model.parameters()).device) for x in val_loader.dataset.tensors[:3]]
            val_y = val_loader.dataset.tensors[3]
            val_pred = model(*val_inputs)
            val_mse = loss_fn(val_pred, val_y).item()
            val_rmse = np.sqrt(val_mse) # Calculate RMSE
            val_losses.append(val_mse)

        scheduler.step(val_rmse) # Step based on validation RMSE

        if val_rmse < best_val_rmse:
            best_val_rmse = val_rmse
            wait = 0
            if is_final:
                 torch.save(model.state_dict(), f'model_{model.__class__.__name__}.pth')
        else:
            wait += 1
            if wait >= patience:
                break
    return train_losses, val_losses, best_val_rmse

def objective_lstm(trial):
    params = {
        'lstm_units': trial.suggest_categorical('lstm_units', [64, 128]),
        'dense_units': trial.suggest_categorical('dense_units', [32, 64]),
        'dropout': trial.suggest_float('dropout', 0.1, 0.4),
        'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)
    }
    lr = params.pop('lr')
    weight_decay = params.pop('weight_decay')
    model = LSTMModel(N_AREAS, N_ITEMS, **params)
    opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    train_ds = TensorDataset(*X_train_dl, y_train_t)
    val_ds = TensorDataset(*X_val_dl, y_val_t)
    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=64)
    _, _, best_val_rmse = train_dl(model, opt, nn.MSELoss(), train_loader, val_loader, y_scaler)
    return best_val_rmse

class CNNModel(nn.Module):
    def __init__(self, n_areas, n_items, filters, kernel, dense_units): 
        super().__init__()
        self.embed_area = nn.Embedding(n_areas, 10)
        self.embed_item = nn.Embedding(n_items, 5)
        self.conv = nn.Conv1d(len(NUMERIC_COLS) + 10 + 5, filters, kernel)
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc1 = nn.Linear(filters, dense_units)
        self.fc2 = nn.Linear(dense_units, 1)
    def forward(self, num, area, item):
        e_area = self.embed_area(area)
        e_item = self.embed_item(item)
        x = torch.cat([num, e_area, e_item], dim=-1).transpose(1, 2)
        x = torch.relu(self.conv(x))
        x = self.pool(x).squeeze(-1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

def objective_cnn(trial):
    params = {
        'filters': trial.suggest_categorical('filters', [64, 128]),
        'kernel': trial.suggest_categorical('kernel', [2, 3]),
        'dense_units': trial.suggest_categorical('dense_units', [32, 64]),
        'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)
    }
    lr = params.pop('lr')
    weight_decay = params.pop('weight_decay')
    model = CNNModel(N_AREAS, N_ITEMS, **params)
    opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    train_ds = TensorDataset(*X_train_dl, y_train_t)
    val_ds = TensorDataset(*X_val_dl, y_val_t)
    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=64)
    _, _, best_val_rmse = train_dl(model, opt, nn.MSELoss(), train_loader, val_loader, y_scaler)
    return best_val_rmse

## 7. Run Optuna

In [59]:
if os.path.exists('best_params_optuna.joblib'):
    best_params = joblib.load('best_params_optuna.joblib')
else:
    best_params = {}

if RUN_OPTUNA:
    studies = {}
    objectives = {
        'LR': objective_lr,
        'RF': objective_rf,
        'XGB': objective_xgb,
        'LSTM': objective_lstm,
        'CNN': objective_cnn
    }
    for name, run in RUN_MODELS.items():
        if run:
            print(f'--- Tuning {name} ---')
            study = optuna.create_study(direction='minimize')
            n_trials = 25 if name in ['RF', 'XGB'] else 15
            if name == 'LR':
                n_trials = 1
            study.optimize(objectives[name], n_trials=n_trials, show_progress_bar=True)
            best_params[name] = study.best_params
            studies[name] = study
            joblib.dump(best_params, 'best_params_optuna.joblib') # Save after each study
            print(f'Best params for {name}: {study.best_params}')
else:
    print('Skipping Optuna tuning.')

[I 2025-11-17 11:18:03,058] A new study created in memory with name: no-name-33436a35-2e23-41f8-af52-f92267fd65c0


--- Tuning LR ---


Best trial: 0. Best value: 1.18775: 100%|██████████| 1/1 [00:00<00:00, 81.89it/s]
[I 2025-11-17 11:18:03,074] A new study created in memory with name: no-name-0b73434d-1b7d-48ad-909a-98177b8a79a2


[I 2025-11-17 11:18:03,069] Trial 0 finished with value: 1.1877468914521914 and parameters: {}. Best is trial 0 with value: 1.1877468914521914.
Best params for LR: {}
--- Tuning RF ---


Best trial: 0. Best value: 1.32379:   4%|▍         | 1/25 [00:03<01:18,  3.28s/it]

[I 2025-11-17 11:18:06,350] Trial 0 finished with value: 1.3237878823351072 and parameters: {'n_estimators': 127, 'max_depth': 14, 'min_samples_leaf': 4, 'max_features': 0.7953033103358108}. Best is trial 0 with value: 1.3237878823351072.


Best trial: 1. Best value: 1.31195:   8%|▊         | 2/25 [00:11<02:21,  6.16s/it]

[I 2025-11-17 11:18:14,536] Trial 1 finished with value: 1.3119478296928748 and parameters: {'n_estimators': 310, 'max_depth': 19, 'min_samples_leaf': 4, 'max_features': 0.8108813840377675}. Best is trial 1 with value: 1.3119478296928748.


Best trial: 1. Best value: 1.31195:  12%|█▏        | 3/25 [00:20<02:45,  7.51s/it]

[I 2025-11-17 11:18:23,655] Trial 2 finished with value: 1.331065843254153 and parameters: {'n_estimators': 368, 'max_depth': 24, 'min_samples_leaf': 3, 'max_features': 0.7469197316050981}. Best is trial 1 with value: 1.3119478296928748.


Best trial: 3. Best value: 1.30302:  16%|█▌        | 4/25 [00:24<02:07,  6.09s/it]

[I 2025-11-17 11:18:27,560] Trial 3 finished with value: 1.3030187249856495 and parameters: {'n_estimators': 304, 'max_depth': 9, 'min_samples_leaf': 4, 'max_features': 0.8482974246181636}. Best is trial 3 with value: 1.3030187249856495.


Best trial: 3. Best value: 1.30302:  20%|██        | 5/25 [00:26<01:29,  4.48s/it]

[I 2025-11-17 11:18:29,196] Trial 4 finished with value: 1.3054598149104342 and parameters: {'n_estimators': 88, 'max_depth': 12, 'min_samples_leaf': 6, 'max_features': 0.9528407639199832}. Best is trial 3 with value: 1.3030187249856495.


Best trial: 5. Best value: 1.30165:  24%|██▍       | 6/25 [00:31<01:31,  4.81s/it]

[I 2025-11-17 11:18:34,643] Trial 5 finished with value: 1.3016511225537162 and parameters: {'n_estimators': 248, 'max_depth': 16, 'min_samples_leaf': 6, 'max_features': 0.9445279166656848}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  28%|██▊       | 7/25 [00:34<01:18,  4.35s/it]

[I 2025-11-17 11:18:38,058] Trial 6 finished with value: 1.3645510000072751 and parameters: {'n_estimators': 192, 'max_depth': 27, 'min_samples_leaf': 2, 'max_features': 0.5052496931052741}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  32%|███▏      | 8/25 [00:40<01:18,  4.63s/it]

[I 2025-11-17 11:18:43,280] Trial 7 finished with value: 1.310126980885065 and parameters: {'n_estimators': 382, 'max_depth': 9, 'min_samples_leaf': 7, 'max_features': 0.8784818858239706}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  36%|███▌      | 9/25 [00:41<00:58,  3.68s/it]

[I 2025-11-17 11:18:44,860] Trial 8 finished with value: 1.3218879050282537 and parameters: {'n_estimators': 52, 'max_depth': 24, 'min_samples_leaf': 10, 'max_features': 0.9717924020872124}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  40%|████      | 10/25 [00:47<01:02,  4.18s/it]

[I 2025-11-17 11:18:50,161] Trial 9 finished with value: 1.3206485781738786 and parameters: {'n_estimators': 235, 'max_depth': 17, 'min_samples_leaf': 10, 'max_features': 0.8793196734562245}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  44%|████▍     | 11/25 [00:48<00:46,  3.33s/it]

[I 2025-11-17 11:18:51,577] Trial 10 finished with value: 1.3377873987280264 and parameters: {'n_estimators': 198, 'max_depth': 6, 'min_samples_leaf': 8, 'max_features': 0.6373375886399812}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  48%|████▊     | 12/25 [00:50<00:38,  2.98s/it]

[I 2025-11-17 11:18:53,760] Trial 11 finished with value: 1.3124835559123338 and parameters: {'n_estimators': 290, 'max_depth': 5, 'min_samples_leaf': 5, 'max_features': 0.8915300158486481}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  52%|█████▏    | 13/25 [00:55<00:41,  3.42s/it]

[I 2025-11-17 11:18:58,175] Trial 12 finished with value: 1.3884372251913286 and parameters: {'n_estimators': 284, 'max_depth': 12, 'min_samples_leaf': 1, 'max_features': 0.702842467821634}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  56%|█████▌    | 14/25 [01:01<00:46,  4.26s/it]

[I 2025-11-17 11:19:04,381] Trial 13 finished with value: 1.3100765057644737 and parameters: {'n_estimators': 243, 'max_depth': 20, 'min_samples_leaf': 8, 'max_features': 0.9373150464070276}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  60%|██████    | 15/25 [01:07<00:49,  4.96s/it]

[I 2025-11-17 11:19:10,958] Trial 14 finished with value: 1.3063150974965747 and parameters: {'n_estimators': 337, 'max_depth': 15, 'min_samples_leaf': 6, 'max_features': 0.8202679219356137}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 5. Best value: 1.30165:  64%|██████▍   | 16/25 [01:09<00:35,  3.97s/it]

[I 2025-11-17 11:19:12,625] Trial 15 finished with value: 1.3125565110853123 and parameters: {'n_estimators': 157, 'max_depth': 9, 'min_samples_leaf': 4, 'max_features': 0.6821652795957311}. Best is trial 5 with value: 1.3016511225537162.


Best trial: 16. Best value: 1.29347:  68%|██████▊   | 17/25 [01:13<00:31,  3.99s/it]

[I 2025-11-17 11:19:16,656] Trial 16 finished with value: 1.2934699421966933 and parameters: {'n_estimators': 269, 'max_depth': 9, 'min_samples_leaf': 5, 'max_features': 0.9821874749184043}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  72%|███████▏  | 18/25 [01:20<00:34,  4.99s/it]

[I 2025-11-17 11:19:23,971] Trial 17 finished with value: 1.301562546506108 and parameters: {'n_estimators': 256, 'max_depth': 22, 'min_samples_leaf': 7, 'max_features': 0.9967517590245661}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  76%|███████▌  | 19/25 [01:26<00:31,  5.27s/it]

[I 2025-11-17 11:19:29,908] Trial 18 finished with value: 1.3071013519950847 and parameters: {'n_estimators': 203, 'max_depth': 29, 'min_samples_leaf': 8, 'max_features': 0.9984857597110453}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  80%|████████  | 20/25 [01:30<00:24,  4.89s/it]

[I 2025-11-17 11:19:33,920] Trial 19 finished with value: 1.3419417160469826 and parameters: {'n_estimators': 265, 'max_depth': 22, 'min_samples_leaf': 7, 'max_features': 0.5624964194242656}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  84%|████████▍ | 21/25 [01:39<00:24,  6.07s/it]

[I 2025-11-17 11:19:42,745] Trial 20 finished with value: 1.3151756854201597 and parameters: {'n_estimators': 327, 'max_depth': 26, 'min_samples_leaf': 9, 'max_features': 0.9967824067205963}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  88%|████████▊ | 22/25 [01:45<00:18,  6.12s/it]

[I 2025-11-17 11:19:48,964] Trial 21 finished with value: 1.3007729535715478 and parameters: {'n_estimators': 257, 'max_depth': 16, 'min_samples_leaf': 5, 'max_features': 0.9204851554646619}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  92%|█████████▏| 23/25 [01:53<00:13,  6.56s/it]

[I 2025-11-17 11:19:56,571] Trial 22 finished with value: 1.3011374770221806 and parameters: {'n_estimators': 269, 'max_depth': 20, 'min_samples_leaf': 5, 'max_features': 0.9163543099170801}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347:  96%|█████████▌| 24/25 [01:59<00:06,  6.44s/it]

[I 2025-11-17 11:20:02,740] Trial 23 finished with value: 1.304328721849824 and parameters: {'n_estimators': 214, 'max_depth': 19, 'min_samples_leaf': 5, 'max_features': 0.9066785877575121}. Best is trial 16 with value: 1.2934699421966933.


Best trial: 16. Best value: 1.29347: 100%|██████████| 25/25 [02:06<00:00,  5.08s/it]
[I 2025-11-17 11:20:10,036] A new study created in memory with name: no-name-58a442bb-1ae7-453d-a43e-a86bd3036a8f


[I 2025-11-17 11:20:10,030] Trial 24 finished with value: 1.316771018535357 and parameters: {'n_estimators': 349, 'max_depth': 13, 'min_samples_leaf': 3, 'max_features': 0.924170223976238}. Best is trial 16 with value: 1.2934699421966933.
Best params for RF: {'n_estimators': 269, 'max_depth': 9, 'min_samples_leaf': 5, 'max_features': 0.9821874749184043}
--- Tuning XGB ---


Best trial: 0. Best value: 1.41262:   4%|▍         | 1/25 [00:01<00:37,  1.55s/it]

[I 2025-11-17 11:20:11,584] Trial 0 finished with value: 1.4126246074782591 and parameters: {'n_estimators': 1183, 'max_depth': 6, 'learning_rate': 0.005666911064757407, 'subsample': 0.9459901659419461, 'colsample_bytree': 0.9239210663423749, 'gamma': 1.3773467133132304}. Best is trial 0 with value: 1.4126246074782591.


Best trial: 1. Best value: 1.38685:   8%|▊         | 2/25 [00:03<00:37,  1.61s/it]

[I 2025-11-17 11:20:13,245] Trial 1 finished with value: 1.3868454997152093 and parameters: {'n_estimators': 1470, 'max_depth': 5, 'learning_rate': 0.002707125159094296, 'subsample': 0.6044122947953006, 'colsample_bytree': 0.9567752639488263, 'gamma': 3.4441813507200854}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  12%|█▏        | 3/25 [00:03<00:21,  1.02it/s]

[I 2025-11-17 11:20:13,462] Trial 2 finished with value: 1.4023572376138884 and parameters: {'n_estimators': 1563, 'max_depth': 4, 'learning_rate': 0.036093456857605406, 'subsample': 0.7867477713446525, 'colsample_bytree': 0.7144268922356528, 'gamma': 2.0000163355044016}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  16%|█▌        | 4/25 [00:04<00:20,  1.05it/s]

[I 2025-11-17 11:20:14,382] Trial 3 finished with value: 1.4157671826266554 and parameters: {'n_estimators': 1651, 'max_depth': 7, 'learning_rate': 0.0105517031610205, 'subsample': 0.8856288469750362, 'colsample_bytree': 0.7315765956030157, 'gamma': 1.7506565239211453}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  20%|██        | 5/25 [00:08<00:45,  2.26s/it]

[I 2025-11-17 11:20:18,969] Trial 4 finished with value: 1.4491816977538434 and parameters: {'n_estimators': 1993, 'max_depth': 10, 'learning_rate': 0.003716429314394576, 'subsample': 0.9780690687570613, 'colsample_bytree': 0.6087223451585403, 'gamma': 0.9970079871990262}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  24%|██▍       | 6/25 [00:09<00:31,  1.64s/it]

[I 2025-11-17 11:20:19,390] Trial 5 finished with value: 1.427800445770593 and parameters: {'n_estimators': 1310, 'max_depth': 8, 'learning_rate': 0.030816528764209934, 'subsample': 0.9941470574410317, 'colsample_bytree': 0.8453576532806993, 'gamma': 2.0710611831584185}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  28%|██▊       | 7/25 [00:11<00:31,  1.72s/it]

[I 2025-11-17 11:20:21,291] Trial 6 finished with value: 1.4371831899871672 and parameters: {'n_estimators': 802, 'max_depth': 11, 'learning_rate': 0.0054219048848085285, 'subsample': 0.6719265894483845, 'colsample_bytree': 0.6685351433293973, 'gamma': 3.0622025120522585}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  32%|███▏      | 8/25 [00:13<00:31,  1.85s/it]

[I 2025-11-17 11:20:23,427] Trial 7 finished with value: 1.4568276318181912 and parameters: {'n_estimators': 629, 'max_depth': 11, 'learning_rate': 0.0035139123472721486, 'subsample': 0.8471958632737626, 'colsample_bytree': 0.8624707188016052, 'gamma': 3.583781417729168}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  36%|███▌      | 9/25 [00:19<00:48,  3.04s/it]

[I 2025-11-17 11:20:29,063] Trial 8 finished with value: 1.4285482188646528 and parameters: {'n_estimators': 1752, 'max_depth': 11, 'learning_rate': 0.0018027676099492535, 'subsample': 0.7404489446073645, 'colsample_bytree': 0.9297770835522645, 'gamma': 2.782352043059718}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  40%|████      | 10/25 [00:19<00:33,  2.24s/it]

[I 2025-11-17 11:20:29,512] Trial 9 finished with value: 1.4092316439445032 and parameters: {'n_estimators': 1314, 'max_depth': 8, 'learning_rate': 0.027208552063069647, 'subsample': 0.64349877875749, 'colsample_bytree': 0.816397046896, 'gamma': 1.777635935088293}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  44%|████▍     | 11/25 [00:20<00:25,  1.85s/it]

[I 2025-11-17 11:20:30,491] Trial 10 finished with value: 1.5576634066793031 and parameters: {'n_estimators': 992, 'max_depth': 3, 'learning_rate': 0.0010771454513609214, 'subsample': 0.6175159793692363, 'colsample_bytree': 0.998775636099762, 'gamma': 4.9180388472986065}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  48%|████▊     | 12/25 [00:20<00:18,  1.42s/it]

[I 2025-11-17 11:20:30,928] Trial 11 finished with value: 1.3961835601203718 and parameters: {'n_estimators': 1531, 'max_depth': 4, 'learning_rate': 0.014022926203464679, 'subsample': 0.754791209073915, 'colsample_bytree': 0.7507631164358838, 'gamma': 4.039078098999485}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  52%|█████▏    | 13/25 [00:21<00:13,  1.13s/it]

[I 2025-11-17 11:20:31,397] Trial 12 finished with value: 1.3975281144410947 and parameters: {'n_estimators': 1462, 'max_depth': 5, 'learning_rate': 0.011535876767580585, 'subsample': 0.7327618468107024, 'colsample_bytree': 0.7473381761053016, 'gamma': 4.177755999435998}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  56%|█████▌    | 14/25 [00:21<00:09,  1.13it/s]

[I 2025-11-17 11:20:31,716] Trial 13 finished with value: 1.3921960974756782 and parameters: {'n_estimators': 1863, 'max_depth': 3, 'learning_rate': 0.01638411028569211, 'subsample': 0.7003699945976607, 'colsample_bytree': 0.7679598548569144, 'gamma': 3.9283105350157372}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  60%|██████    | 15/25 [00:23<00:10,  1.09s/it]

[I 2025-11-17 11:20:33,259] Trial 14 finished with value: 1.3893373241002696 and parameters: {'n_estimators': 1998, 'max_depth': 3, 'learning_rate': 0.0020039724904930304, 'subsample': 0.6858698302077438, 'colsample_bytree': 0.8890993256164056, 'gamma': 4.965347167030881}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  64%|██████▍   | 16/25 [00:25<00:12,  1.40s/it]

[I 2025-11-17 11:20:35,405] Trial 15 finished with value: 1.3902088162770052 and parameters: {'n_estimators': 1899, 'max_depth': 5, 'learning_rate': 0.001965249175873605, 'subsample': 0.6105449510084581, 'colsample_bytree': 0.9967880348968271, 'gamma': 0.06837136643057073}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  68%|██████▊   | 17/25 [00:27<00:11,  1.49s/it]

[I 2025-11-17 11:20:37,103] Trial 16 finished with value: 1.4228331253134257 and parameters: {'n_estimators': 1118, 'max_depth': 6, 'learning_rate': 0.0021099018241667504, 'subsample': 0.6690915407092289, 'colsample_bytree': 0.9015662467033445, 'gamma': 4.9813205974168655}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  72%|███████▏  | 18/25 [00:28<00:10,  1.55s/it]

[I 2025-11-17 11:20:38,780] Trial 17 finished with value: 1.4546827141691465 and parameters: {'n_estimators': 1781, 'max_depth': 3, 'learning_rate': 0.0011107030614044707, 'subsample': 0.6063489811746231, 'colsample_bytree': 0.9504130467436934, 'gamma': 3.308412499194212}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  76%|███████▌  | 19/25 [00:30<00:09,  1.62s/it]

[I 2025-11-17 11:20:40,552] Trial 18 finished with value: 1.4133962805399398 and parameters: {'n_estimators': 931, 'max_depth': 5, 'learning_rate': 0.0029481801168205492, 'subsample': 0.6839400500890325, 'colsample_bytree': 0.8700443632732279, 'gamma': 4.481890448915032}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  80%|████████  | 20/25 [00:32<00:08,  1.63s/it]

[I 2025-11-17 11:20:42,232] Trial 19 finished with value: 1.4173545079806171 and parameters: {'n_estimators': 1445, 'max_depth': 9, 'learning_rate': 0.007182240317672763, 'subsample': 0.8347676015777074, 'colsample_bytree': 0.950166249554744, 'gamma': 2.5408223001522905}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  84%|████████▍ | 21/25 [00:33<00:06,  1.62s/it]

[I 2025-11-17 11:20:43,830] Trial 20 finished with value: 1.4180645623335095 and parameters: {'n_estimators': 1690, 'max_depth': 4, 'learning_rate': 0.0014967663669485402, 'subsample': 0.6462584375926912, 'colsample_bytree': 0.81756676361547, 'gamma': 4.521961240668922}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  88%|████████▊ | 22/25 [00:35<00:04,  1.66s/it]

[I 2025-11-17 11:20:45,559] Trial 21 finished with value: 1.3927641216310311 and parameters: {'n_estimators': 1992, 'max_depth': 5, 'learning_rate': 0.002271243288360474, 'subsample': 0.6017827949042933, 'colsample_bytree': 0.9980391507496155, 'gamma': 0.1723086153460768}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  92%|█████████▏| 23/25 [00:37<00:03,  1.78s/it]

[I 2025-11-17 11:20:47,633] Trial 22 finished with value: 1.3969709259456278 and parameters: {'n_estimators': 1901, 'max_depth': 6, 'learning_rate': 0.002763179037318324, 'subsample': 0.6341398886790031, 'colsample_bytree': 0.9669928203305969, 'gamma': 0.31307210461160395}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685:  96%|█████████▌| 24/25 [00:39<00:01,  1.78s/it]

[I 2025-11-17 11:20:49,417] Trial 23 finished with value: 1.4121103725771291 and parameters: {'n_estimators': 1850, 'max_depth': 4, 'learning_rate': 0.0014510626593219032, 'subsample': 0.7237863971335747, 'colsample_bytree': 0.906945434946494, 'gamma': 0.7554964139881655}. Best is trial 1 with value: 1.3868454997152093.


Best trial: 1. Best value: 1.38685: 100%|██████████| 25/25 [00:41<00:00,  1.65s/it]
[I 2025-11-17 11:20:51,228] A new study created in memory with name: no-name-44c492fc-808e-41f8-8678-15dcda19caa2


[I 2025-11-17 11:20:51,224] Trial 24 finished with value: 1.4090271940050099 and parameters: {'n_estimators': 1655, 'max_depth': 7, 'learning_rate': 0.004526335425755974, 'subsample': 0.7001556496588544, 'colsample_bytree': 0.9748693450839212, 'gamma': 3.5585819531079275}. Best is trial 1 with value: 1.3868454997152093.
Best params for XGB: {'n_estimators': 1470, 'max_depth': 5, 'learning_rate': 0.002707125159094296, 'subsample': 0.6044122947953006, 'colsample_bytree': 0.9567752639488263, 'gamma': 3.4441813507200854}
--- Tuning LSTM ---


Best trial: 0. Best value: 1.67768:   7%|▋         | 1/15 [00:41<09:42, 41.61s/it]

[I 2025-11-17 11:21:32,836] Trial 0 finished with value: 1.6776844945382643 and parameters: {'lstm_units': 64, 'dense_units': 32, 'dropout': 0.17575043254224332, 'lr': 0.0004294105118937871, 'weight_decay': 1.0627320213368103e-05}. Best is trial 0 with value: 1.6776844945382643.


Best trial: 0. Best value: 1.67768:  13%|█▎        | 2/15 [01:33<10:17, 47.49s/it]

[I 2025-11-17 11:22:24,449] Trial 1 finished with value: 1.695263347704261 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.2926060942625537, 'lr': 0.002145708920990918, 'weight_decay': 0.0001820490258352781}. Best is trial 0 with value: 1.6776844945382643.


Best trial: 0. Best value: 1.67768:  20%|██        | 3/15 [01:48<06:32, 32.68s/it]

[I 2025-11-17 11:22:39,505] Trial 2 finished with value: 1.7454551536024363 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.3765145253151616, 'lr': 0.00022705571314674908, 'weight_decay': 0.00031611015952137596}. Best is trial 0 with value: 1.6776844945382643.


Best trial: 0. Best value: 1.67768:  27%|██▋       | 4/15 [02:13<05:28, 29.84s/it]

[I 2025-11-17 11:23:04,997] Trial 3 finished with value: 1.7451139769180386 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.30455953626877763, 'lr': 0.005777904411815902, 'weight_decay': 4.6629260568758854e-05}. Best is trial 0 with value: 1.6776844945382643.


Best trial: 0. Best value: 1.67768:  33%|███▎      | 5/15 [02:31<04:13, 25.32s/it]

[I 2025-11-17 11:23:22,311] Trial 4 finished with value: 1.734802262199146 and parameters: {'lstm_units': 64, 'dense_units': 64, 'dropout': 0.3689918542050725, 'lr': 0.0009678290962017685, 'weight_decay': 1.0843575272177972e-05}. Best is trial 0 with value: 1.6776844945382643.


Best trial: 0. Best value: 1.67768:  40%|████      | 6/15 [02:54<03:42, 24.69s/it]

[I 2025-11-17 11:23:45,764] Trial 5 finished with value: 1.729663187767574 and parameters: {'lstm_units': 64, 'dense_units': 64, 'dropout': 0.27304496242944193, 'lr': 0.004252891943366674, 'weight_decay': 2.7089896216735266e-05}. Best is trial 0 with value: 1.6776844945382643.


Best trial: 6. Best value: 1.67179:  47%|████▋     | 7/15 [03:56<04:54, 36.79s/it]

[I 2025-11-17 11:24:47,478] Trial 6 finished with value: 1.671792001535755 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.19542703532748512, 'lr': 0.0004437280938446898, 'weight_decay': 0.00026714365996161467}. Best is trial 6 with value: 1.671792001535755.


Best trial: 6. Best value: 1.67179:  53%|█████▎    | 8/15 [04:49<04:54, 42.03s/it]

[I 2025-11-17 11:25:40,733] Trial 7 finished with value: 1.6875423143521118 and parameters: {'lstm_units': 128, 'dense_units': 64, 'dropout': 0.3018037322036329, 'lr': 0.0016799381451982865, 'weight_decay': 0.00019228419512275644}. Best is trial 6 with value: 1.671792001535755.


Best trial: 6. Best value: 1.67179:  60%|██████    | 9/15 [05:46<04:40, 46.68s/it]

[I 2025-11-17 11:26:37,638] Trial 8 finished with value: 1.7448161185150168 and parameters: {'lstm_units': 64, 'dense_units': 64, 'dropout': 0.23430304233182203, 'lr': 0.00010223579819587528, 'weight_decay': 5.092144622370558e-05}. Best is trial 6 with value: 1.671792001535755.


Best trial: 6. Best value: 1.67179:  67%|██████▋   | 10/15 [05:57<02:58, 35.77s/it]

[I 2025-11-17 11:26:48,970] Trial 9 finished with value: 1.7449112884357467 and parameters: {'lstm_units': 64, 'dense_units': 32, 'dropout': 0.1783151066801978, 'lr': 0.002443251353443344, 'weight_decay': 0.0008442965178437776}. Best is trial 6 with value: 1.671792001535755.


Best trial: 6. Best value: 1.67179:  73%|███████▎  | 11/15 [06:11<01:56, 29.11s/it]

[I 2025-11-17 11:27:02,985] Trial 10 finished with value: 1.7455796545294078 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.11669585052562781, 'lr': 0.0005234466510409959, 'weight_decay': 0.0009263224793399571}. Best is trial 6 with value: 1.671792001535755.


Best trial: 6. Best value: 1.67179:  80%|████████  | 12/15 [06:23<01:11, 23.76s/it]

[I 2025-11-17 11:27:14,505] Trial 11 finished with value: 1.7459315965019067 and parameters: {'lstm_units': 64, 'dense_units': 32, 'dropout': 0.18017483183730515, 'lr': 0.00043806489824542986, 'weight_decay': 1.0260282543590158e-05}. Best is trial 6 with value: 1.671792001535755.


Best trial: 6. Best value: 1.67179:  87%|████████▋ | 13/15 [06:50<00:49, 24.68s/it]

[I 2025-11-17 11:27:41,311] Trial 12 finished with value: 1.7433477395789316 and parameters: {'lstm_units': 64, 'dense_units': 32, 'dropout': 0.18738416852391837, 'lr': 0.00027781085041593507, 'weight_decay': 0.0003503566614732333}. Best is trial 6 with value: 1.671792001535755.


Best trial: 13. Best value: 1.63478:  93%|█████████▎| 14/15 [07:49<00:35, 35.27s/it]

[I 2025-11-17 11:28:41,057] Trial 13 finished with value: 1.6347789695495891 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.10229058258091793, 'lr': 0.0006881935229186687, 'weight_decay': 7.466224843062375e-05}. Best is trial 13 with value: 1.6347789695495891.


Best trial: 13. Best value: 1.63478: 100%|██████████| 15/15 [08:42<00:00, 34.81s/it]
[I 2025-11-17 11:29:33,351] A new study created in memory with name: no-name-e35d5ec9-54ae-465a-af88-6c041553f975


[I 2025-11-17 11:29:33,345] Trial 14 finished with value: 1.6888845909353971 and parameters: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.11227145703485898, 'lr': 0.0008625518636271353, 'weight_decay': 9.739400462859756e-05}. Best is trial 13 with value: 1.6347789695495891.
Best params for LSTM: {'lstm_units': 128, 'dense_units': 32, 'dropout': 0.10229058258091793, 'lr': 0.0006881935229186687, 'weight_decay': 7.466224843062375e-05}
--- Tuning CNN ---


  0%|          | 0/15 [00:19<?, ?it/s]


[W 2025-11-17 11:29:52,649] Trial 0 failed with parameters: {'filters': 128, 'kernel': 3, 'dense_units': 32, 'lr': 0.0007431836616249293, 'weight_decay': 0.0006689897869381488} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\PavinP\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\optuna\study\_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\PavinP\AppData\Local\Temp\ipykernel_17128\1364281096.py", line 157, in objective_cnn
    _, _, best_val_rmse = train_dl(model, opt, nn.MSELoss(), train_loader, val_loader, y_scaler)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\PavinP\AppData\Local\Temp\ipykernel_17128\1364281096.py", line 73, in train_dl
    for x1, x2, x3, y in train_loader:
                         ^^^^^^^^^^^

KeyboardInterrupt: 

## 7a. Visualize Optuna Results

In [None]:
from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_slice, plot_param_importances

if RUN_OPTUNA and 'studies' in locals():
    for name, study in studies.items():
        if name == 'LR' or not study.trials:
            continue
        print(f'--- Visualizing Optuna results for {name} ---')
        
        # Optimization History
        fig = plot_optimization_history(study)
        fig.update_layout(title=f'{name} Optimization History')
        fig.write_image(f'optuna_{name}_history.png')
        fig.show()

        # Parallel Coordinate
        fig = plot_parallel_coordinate(study)
        fig.update_layout(title=f'{name} Parallel Coordinate')
        fig.write_image(f'optuna_{name}_parallel_coordinate.png')
        fig.show()

        # Slice Plot
        fig = plot_slice(study)
        fig.update_layout(title=f'{name} Slice Plot')
        fig.write_image(f'optuna_{name}_slice.png')
        fig.show()

        # Parameter Importance
        try:
            fig = plot_param_importances(study)
            fig.update_layout(title=f'{name} Parameter Importance')
            fig.write_image(f'optuna_{name}_param_importance.png')
            fig.show()
        except (ValueError, RuntimeError) as e:
            print(f'Could not plot parameter importance for {name}: {e}')

## 8. Final Training (Corrected)

In [None]:
# Combine train+val for final ML model training
X_train_full_ml = pd.concat([X_train_ml, X_val_ml])
y_train_full_ml = pd.concat([y_train_ml, y_val_ml])

models = {}
print("--- Final Model Training ---")

if RUN_MODELS['LR']:
    print("Training Linear Regression...")
    model_lr = LinearRegression()
    model_lr.fit(X_train_full_ml, y_train_full_ml)
    models['LR'] = model_lr
    joblib.dump(model_lr, 'model_lr.joblib')

if RUN_MODELS['RF']:
    print("Training Random Forest...")
    # Use best params from Optuna, or default if not run
    rf_params = best_params.get('RF', {'n_estimators': 100, 'max_depth': 10})
    model_rf = RandomForestRegressor(random_state=42, n_jobs=-1, **rf_params)
    model_rf.fit(X_train_full_ml, y_train_full_ml)
    models['RF'] = model_rf
    joblib.dump(model_rf, 'model_rf.joblib')

if RUN_MODELS['XGB']:
    print("Training XGBoost...")
    xgb_params = best_params.get('XGB', {'n_estimators': 200, 'learning_rate': 0.05})
    model_xgb = xgb.XGBRegressor(random_state=42, **xgb_params)
    model_xgb.fit(X_train_full_ml, y_train_full_ml)
    models['XGB'] = model_xgb
    joblib.dump(model_xgb, 'model_xgb.joblib')

# Combine train+val for final DL model training
X_train_full_seq = np.concatenate([X_train_seq, X_val_seq])
y_train_full_seq = np.concatenate([y_train_seq, y_val_seq])
X_train_full_dl = split_dl(X_train_full_seq)
y_train_full_t = torch.tensor(y_train_full_seq, dtype=torch.float32).unsqueeze(1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_full_ds = TensorDataset(*[x.to(device) for x in X_train_full_dl], y_train_full_t.to(device))
test_ds = TensorDataset(*[x.to(device) for x in X_test_dl], y_test_t.to(device))
train_loader = DataLoader(train_full_ds, batch_size=64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=64)

if RUN_MODELS['LSTM']:
    print("Training LSTM...")
    lstm_params = best_params.get('LSTM', {'lstm_units': 64, 'dense_units': 32, 'dropout': 0.2})
    lr = lstm_params.pop('lr', 0.001)
    weight_decay = lstm_params.pop('weight_decay', 1e-4)
    model_lstm = LSTMModel(N_AREAS, N_ITEMS, **lstm_params).to(device)
    opt_lstm = optim.Adam(model_lstm.parameters(), lr=lr, weight_decay=weight_decay)
    train_losses_lstm, val_losses_lstm, _ = train_dl(model_lstm, opt_lstm, nn.MSELoss(), train_loader, test_loader, y_scaler, epochs=150, patience=15, is_final=True)
    models['LSTM'] = model_lstm

if RUN_MODELS['CNN']:
    print("Training CNN...")
    cnn_params = best_params.get('CNN', {'filters': 64, 'kernel': 2, 'dense_units': 32})
    lr = cnn_params.pop('lr', 0.001)
    weight_decay = cnn_params.pop('weight_decay', 1e-4)
    model_cnn = CNNModel(N_AREAS, N_ITEMS, **cnn_params).to(device)
    opt_cnn = optim.Adam(model_cnn.parameters(), lr=lr, weight_decay=weight_decay)
    train_losses_cnn, val_losses_cnn, _ = train_dl(model_cnn, opt_cnn, nn.MSELoss(), train_loader, test_loader, y_scaler, epochs=150, patience=15, is_final=True)
    models['CNN'] = model_cnn

## 9. Plot DL Loss Curves

In [None]:
if RUN_MODELS['LSTM'] and RUN_MODELS['CNN']:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 7))
    ax1.plot(train_losses_lstm, label='Train Loss')
    ax1.plot(val_losses_lstm, label='Validation (Test) Loss')
    ax1.set_title('LSTM Model Loss', fontsize=16)
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Mean Squared Error')
    ax1.legend()
    ax2.plot(train_losses_cnn, label='Train Loss')
    ax2.plot(val_losses_cnn, label='Validation (Test) Loss')
    ax2.set_title('CNN Model Loss', fontsize=16)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Mean Squared Error')
    ax2.legend()
    plt.suptitle('Deep Learning Training Curves', fontsize=20)
    plt.savefig("loss_curves.png")
    plt.show()

## 10. Final Evaluation (Corrected)

In [None]:
def reconstruct_yield(y_pred_scaled, df_ref, y_scaler_obj):
    """Inverse transforms a prediction to the original yield scale."""
    if y_pred_scaled.ndim == 1:
        y_pred_scaled = y_pred_scaled.reshape(-1, 1)
    
    y_pred_detrended = y_scaler_obj.inverse_transform(y_pred_scaled)
    
    trend = df_ref['yield_trend'].values.reshape(-1, 1)
    y_pred_actual = y_pred_detrended + trend
    
    return y_pred_actual.flatten()

# --- Evaluation ---
results = []
y_preds_original = {}

print("\n--- Final Performance (Test Set) ---")

for name, model in models.items():
    if name in ['LR', 'RF', 'XGB']:
        # Predict on the scaled test set
        preds_scaled = model.predict(X_test_ml)
        # Reconstruct the predictions
        pred_orig = reconstruct_yield(preds_scaled, test_df, y_scaler)
        y_true_orig = test_df[TARGET].values
        # Store for later use
        y_preds_original[name] = pred_orig
        
    elif name in ['LSTM', 'CNN']:
        model.eval()
        with torch.no_grad():
            # Predict on the scaled test set
            preds_scaled_t = model(*[x.to(device) for x in X_test_dl])
            preds_scaled = preds_scaled_t.cpu().numpy()
            # Reconstruct the predictions
            pred_orig = reconstruct_yield(preds_scaled, test_df_dl_seq_ref, y_scaler)
            y_true_orig = test_df_dl_seq_ref[TARGET].values
            # Store for later use
            y_preds_original[name] = pred_orig

    # Calculate metrics
    mae = mean_absolute_error(y_true_orig, pred_orig)
    rmse = np.sqrt(mean_squared_error(y_true_orig, pred_orig))
    map_e = mape(y_true_orig, pred_orig)
    rms_pe = rmspe(y_true_orig, pred_orig)
    r_2 = r2_score(y_true_orig, pred_orig)
    results.append({'Model': name, 'MAE': mae, 'RMSE': rmse, 'MAPE (%)': map_e, 'RMSPE (%)': rms_pe, 'R²': r_2})

results_df = pd.DataFrame(results).set_index('Model').sort_values('RMSE')
print(results_df.round(2))
results_df.to_csv("final_model_performance.csv")

## 11. Plot Model Performances

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(24, 6))
sns.barplot(data=results_df.reset_index(), x='Model', y='RMSE', ax=axs[0])
axs[0].set_title('RMSE Comparison')
sns.barplot(x='Model', y='MAE', data=results_df.reset_index(), ax=axs[1])
axs[1].set_title('MAE Comparison')
sns.barplot(x='Model', y='MAPE (%)', data=results_df.reset_index(), ax=axs[2])
axs[2].set_title('MAPE Comparison')
sns.barplot(x='Model', y='R²', data=results_df.reset_index(), ax=axs[3])
axs[3].set_title('R² Comparison')
plt.tight_layout()
plt.savefig("model_performance_comparison.png")
plt.show()

## 12. Per-Crop Reporting (Best Model)

In [None]:
best_model_name = results_df.index[0]
print(f"Per-crop report for best model: {best_model_name}")
crop_results = []
# Use the correctly aligned test dataframe based on the best model
if best_model_name in ['LR', 'RF', 'XGB']:
    reporting_df = test_df
    y_true_original = reporting_df[TARGET].values
else:
    reporting_df = test_df_dl_seq_ref
    y_true_original = reporting_df[TARGET].values

items = reporting_df['Item'].values

for crop in np.unique(items):
    mask = items == crop
    true = y_true_original[mask]
    pred = y_preds_original[best_model_name][mask]
    if len(true) > 0:
        crop_results.append({
            'Crop': crop,
            'RMSPE (%)': rmspe(true, pred),
            'MAPE (%)': mape(true, pred),
            'RMSE': np.sqrt(mean_squared_error(true, pred)),
            'R²': r2_score(true, pred)
        })
crop_df = pd.DataFrame(crop_results).sort_values('RMSPE (%)')
print(crop_df.round(2))
crop_df.to_csv('per_crop_performance.csv', index=False)

## 13. SHAP Analysis (If Tree Model)

In [None]:
best_model_name = results_df.index[0]
if best_model_name in models and best_model_name in ['RF', 'XGB']:
    best_model = models[best_model_name]
    print(f"Running SHAP on {best_model_name}")
    # For SHAP, we need to use the correctly aligned test features
    X_test_shap = X_test_ml
    explainer = shap.TreeExplainer(best_model)
    shap_values = explainer.shap_values(X_test_shap)
    shap.summary_plot(shap_values, X_test_shap, plot_type="beeswarm", show=False)
    plt.title(f"SHAP Beeswarm ({best_model_name})", fontsize=16)
    plt.savefig("shap_beeswarm.png", bbox_inches='tight')
    plt.show()
    shap.summary_plot(shap_values, X_test_shap, plot_type="bar", show=False)
    plt.title(f"Feature Importance ({best_model_name})", fontsize=16)
    plt.savefig("shap_importance.png", bbox_inches='tight')
    plt.show()
else:
    print("SHAP skipped for non-tree model.")

## 14. Export Predictions

In [None]:
# Create a base dataframe for predictions. ML models have more test samples than DL models.
final_predictions_df = test_df.copy()
final_predictions_df['true_yield_original'] = final_predictions_df[TARGET]

# Add predictions. Note that DL predictions will have NaNs for non-sequenced rows.
for name, preds in y_preds_original.items():
    if name in ['LR', 'RF', 'XGB']:
        final_predictions_df[f'predicted_{name}'] = preds
    else:
        # Align DL predictions with the main test dataframe
        dl_preds_series = pd.Series(preds, index=test_df_dl_seq_ref.index, name=f'predicted_{name}')
        final_predictions_df = final_predictions_df.join(dl_preds_series)

export_cols = ['Year', 'Area', 'Item', 'true_yield_original'] + [f'predicted_{name}' for name in models.keys()]
final_predictions_df[export_cols].to_csv("final_test_predictions.csv", index=False)
print("Exported predictions.")
print("\n--- Complete ---")