In [1]:
!pip install transformers 
!pip install protobuf==3.20.3

Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.33.0
    Uninstalling protobuf-6.33.0:
      Successfully uninstalled protobuf-6.33.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
a2a-sdk 0.3.10 requires p

In [2]:
import pandas as pd 
import numpy as np 
import os 
import glob 
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
import timm # For pre-trained models
from tqdm import tqdm # For progress bars
from PIL import Image



In [3]:
import torch
from transformers import AutoImageProcessor, AutoModel

# This is the path to the dataset you added in Step 1
# The exact path may vary slightly based on which dataset you chose.
MODEL_PATH = '/kaggle/input/dinov2/pytorch/giant/1' 
# Load the processor from the local files
processor = AutoImageProcessor.from_pretrained(MODEL_PATH)

# Load the model from the local files
model = AutoModel.from_pretrained(MODEL_PATH)
# --- Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.eval()
print("DINOv2 model loaded successfully from local files.")

2025-11-19 09:41:29.109514: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763545289.365319      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763545289.433386      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Using device: cuda
DINOv2 model loaded successfully from local files.


In [4]:
BASE_PATH='/kaggle/input/csiro-biomass'
train_meta=pd.read_csv(os.path.join(BASE_PATH,'train.csv'))

In [5]:
# 1. Pivot the target variables
# (Pivoting on 'image_id' is a bit cleaner than the full path)
targets_df = train_meta.pivot(index='image_path', 
                              columns='target_name', 
                              values='target')

In [6]:
# 2. Get the unique metadata for each image
# (State, Species, etc. are repeated, so we just grab the first instance)
meta_df = train_meta[[  'image_path', 'Sampling_Date', 'State', 'Species',
       'Pre_GSHH_NDVI', 'Height_Ave_cm']] \
                      .drop_duplicates(subset='image_path') \
                      .set_index('image_path')





In [7]:
# 3. Join them together
# This creates one clean row per image with all data
train_df = meta_df.join(targets_df).reset_index()

In [8]:
# 4. Define our 5 output targets
TARGET_COLS = list(train_df.columns[-5:])


# Weights: [Dry_Total_g, Dry_Green_g, Dry_Dead_g, Dry_Clover_g, Dry_Grass_g]
COMP_WEIGHTS = torch.tensor([0.5, 0.2, 0.1, 0.1,0.1]).to(device)

# Now train_df is ready!
print(train_df.head())

               image_path Sampling_Date State            Species  \
0  train/ID1011485656.jpg      2015/9/4   Tas    Ryegrass_Clover   
1  train/ID1012260530.jpg      2015/4/1   NSW            Lucerne   
2  train/ID1025234388.jpg      2015/9/1    WA  SubcloverDalkeith   
3  train/ID1028611175.jpg     2015/5/18   Tas           Ryegrass   
4  train/ID1035947949.jpg     2015/9/11   Tas           Ryegrass   

   Pre_GSHH_NDVI  Height_Ave_cm  Dry_Clover_g  Dry_Dead_g  Dry_Green_g  \
0           0.62         4.6667        0.0000     31.9984      16.2751   
1           0.55        16.0000        0.0000      0.0000       7.6000   
2           0.38         1.0000        6.0500      0.0000       0.0000   
3           0.66         5.0000        0.0000     30.9703      24.2376   
4           0.54         3.5000        0.4343     23.2239      10.5261   

   Dry_Total_g    GDM_g  
0      48.2735  16.2750  
1       7.6000   7.6000  
2       6.0500   6.0500  
3      55.2079  24.2376  
4      34.1844  

In [9]:
train_df_sample = train_df.iloc[:round(len(train_df)*0.8)]
val_df_sample = train_df.iloc[round(len(train_df)*0.8):]

In [10]:
val_df_sample.shape

(71, 11)

In [11]:
# First, you might need to install it:

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2 # Albumentations uses OpenCV


# --- Standard ImageNet Normalization Values (Common for DINO/ViT models) ---
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
TARGET_SIZE = 256 # Assuming a standard input size for ViT features

# --- 1. Define your transforms ---

# These are applied ONLY to the training data.
# They include your suggested augmentations plus necessary normalization.
train_transforms = A.Compose([
    # Resize to model input size
    A.Resize(TARGET_SIZE, TARGET_SIZE), 
    
    # Geometric Augmentations (User-provided)
    A.HorizontalFlip(p=0.5),      # 50% chance to flip horizontally
    A.VerticalFlip(p=0.5),        # 50% chance to flip vertically
    A.RandomRotate90(p=0.5),      # 50% chance to rotate 90 degrees
    
    # Noise/Distortion Augmentations (User-provided)
    A.GaussNoise(p=0.2),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5, fill_value=0),
    
    # Essential: Normalization and Tensor Conversion
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2()
])

# These are for validation/testing. We ONLY resize, normalize, and convert to tensor.
# NO random flips, noise, or distortion should be applied here.
val_transforms = A.Compose([
    A.Resize(TARGET_SIZE, TARGET_SIZE), # Essential: Resize to match training size
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), # Essential: Normalize
    ToTensorV2() # Essential: Convert to PyTorch Tensor
])

print("Image transforms defined: train_transforms (augmented) and val_transforms (deterministic/normalized).")

# --- 2. Update the Custom Dataset Class ---
class BiomassDataset(Dataset):
    def __init__(self, df, target_cols, base_path, processor,model,transforms=None,):
        self.df = df
        self.image_paths = [os.path.join(base_path, p) for p in df['image_path']]
        self.labels = df[target_cols].values.astype(np.float32)
        self.transforms = transforms # <-- NEW
        


        # --- GPU SETUP ---
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model
        
      
        self.processor=processor 
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        feature_list=[]
        # Load image with OpenCV (used by albumentations)
        # cv2 loads as BGR, so we convert to RGB
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        labels = self.labels[idx]

        image_features=self.processor(image,return_tensors='pt')
    
        with torch.no_grad():
            model_out=self.model(**image_features)
            feature_list.append(model_out.pooler_output.cpu())
        
        # Apply transforms (if they exist)
        if self.transforms:
            # Albumentations returns a dictionary
            transformed = self.transforms(image=image)
            image = transformed['image']
        
        # Labels are already a numpy array, just convert to tensor
        labels = torch.tensor(labels)

        return image, labels,np.array(feature_list)


Image transforms defined: train_transforms (augmented) and val_transforms (deterministic/normalized).


  A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5, fill_value=0),


In [12]:
# import albumentations as A
# from albumentations.pytorch import ToTensorV2
# import cv2 
# import os
# import numpy as np
# import torch
# from torch.utils.data import Dataset # Import the base Dataset class

# # --- Standard ImageNet Normalization Values (Common for DINO/ViT models) ---
# IMAGENET_MEAN = [0.485, 0.456, 0.406]
# IMAGENET_STD = [0.229, 0.224, 0.225]
# TARGET_SIZE = 256 # Assuming a standard input size for ViT features

# # --- 1. Define your transforms ---

# # These are applied ONLY to the training data.
# train_transforms = A.Compose([
#     # Resize to model input size
#     A.Resize(TARGET_SIZE, TARGET_SIZE), 
    
#     # Geometric Augmentations (User-provided)
#     A.HorizontalFlip(p=0.5),      
#     A.VerticalFlip(p=0.5),        
#     A.RandomRotate90(p=0.5),      
    
#     # Noise/Distortion Augmentations (User-provided)
#     A.GaussNoise(p=0.2),
#     A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5, fill_value=0),
    
#     # Essential: Normalization and Tensor Conversion
#     A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
#     ToTensorV2()
# ])

# # These are for validation/testing. Deterministic transformations only.
# val_transforms = A.Compose([
#     A.Resize(TARGET_SIZE, TARGET_SIZE), 
#     A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), 
#     ToTensorV2() 
# ])

# print("Image transforms defined: train_transforms (augmented) and val_transforms (deterministic/normalized).")

# # --- 2. Corrected Custom Dataset Class ---
# class BiomassDataset(Dataset):
#     """
#     Dataset class for loading images, applying augmentation, extracting features,
#     and returning the Image Tensor, Labels, and Feature Vector.
#     """
#     def __init__(self, df, target_cols, base_path, processor, model, transforms=None):
#         self.df = df
#         self.image_paths = [os.path.join(base_path, p) for p in df['image_path']]
#         self.labels = df[target_cols].values.astype(np.float32)
#         self.transforms = transforms 
#         self.processor = processor 

#         # --- GPU SETUP ---
#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#         self.model = model
        
#         if self.model:
#             # Move model to GPU upon initialization
#             self.model.to(self.device)
#             self.model.eval()

#     def __len__(self):
#         return len(self.image_paths)

#     def __getitem__(self, idx):
#         image_path = self.image_paths[idx]
        
#         # 1. Load image (HWC format)
#         image = cv2.imread(image_path)
#         if image is None:
#             # Handle broken images gracefully
#             # Returning zeros/NaNs might be better than raising an error, depending on batch size
#             raise FileNotFoundError(f"Image not found at {image_path}")
            
#         # cv2 loads as BGR, convert to RGB (necessary for Albumentations/Pytorch)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
#         # 2. Apply transforms (augmentation, resizing, normalization, ToTensorV2)
#         if self.transforms:
#             transformed = self.transforms(image=image)
#             image_tensor = transformed['image'] # C, H, W tensor
#         else:
#             transformed = val_transforms(image=image) 
#             image_tensor = transformed['image']

#         labels = self.labels[idx]

#         # 3. Extract Features (Single Pass)
#         with torch.no_grad():
#             # Add batch dimension (1, C, H, W)
#             model_input = image_tensor.unsqueeze(0) 
            
#             # --- GPU USAGE ---
#             # Move the input tensor to the same device as the model
#             model_input = model_input.to(self.device)
            
#             # Extract features (e.g., ViT pooler output)
#             model_out = self.model(model_input)
            
#             # Get the feature vector
#             if hasattr(model_out, 'pooler_output'):
#                 feature_vector = model_out.pooler_output.squeeze(0).cpu() 
#             else:
#                 feature_vector = model_out.last_hidden_state[:, 0, :].squeeze(0).cpu() 

#         # 4. Return all three required outputs
#         # image_tensor.cpu() is safe to pass out of a worker process.
#         return image_tensor.cpu(), torch.tensor(labels), feature_vector.detach()

In [13]:

# --- 3. Create Datasets with new transforms ---
train_dataset = BiomassDataset(df=train_df_sample, 
                               target_cols=TARGET_COLS, 
                               base_path=BASE_PATH,
                               transforms=None,processor=processor,model=model) # <-- Pass transforms

val_dataset = BiomassDataset(df=val_df_sample, 
                             target_cols=TARGET_COLS, 
                             base_path=BASE_PATH,
                             transforms=None,processor=processor,model=model) # <-- Pass transforms

# DataLoaders are created the same way
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)



In [14]:
# Test it
images, labels,encodings = next(iter(val_loader))

print(f"Image batch shape: {images.shape}") 
print(f"Labels batch shape: {labels.shape}") 
print(f"Feature batch shape: {encodings.shape}")


Image batch shape: torch.Size([64, 1000, 2000, 3])
Labels batch shape: torch.Size([64, 5])
Feature batch shape: torch.Size([64, 1, 1, 1536])


In [15]:
import numpy as np
import torch
from tqdm import tqdm

# Containers for the full dataset
all_features = []
all_labels = []

print("Extracting features from existing pipeline...")

for images, labels, encodings in tqdm(train_loader):
    
 
    # .view(batch_size, -1) flattens everything after the batch dimension
    batch_features = encodings.view(encodings.shape[0], -1).cpu().numpy()
    
    # 2. Convert labels to numpy
    batch_labels = labels.cpu().numpy()
    
    # 3. Collect
    all_features.append(batch_features)
    all_labels.append(batch_labels)

# Concatenate all batches into single numpy arrays
X_train = np.vstack(all_features)
y_train = np.vstack(all_labels)


Extracting features from existing pipeline...


100%|██████████| 9/9 [15:57<00:00, 106.38s/it]


In [16]:
# Containers for the full dataset
all_features_val = []
all_labels_val = []
for images, labels, encodings in tqdm(val_loader):
    
 
    # .view(batch_size, -1) flattens everything after the batch dimension
    batch_features_val = encodings.view(encodings.shape[0], -1).cpu().numpy()
    
    # 2. Convert labels to numpy
    batch_labels_val = labels.cpu().numpy()
    
    # 3. Collect
    all_features_val.append(batch_features_val)
    all_labels_val.append(batch_labels_val)

# Concatenate all batches into single numpy arrays
X_val = np.vstack(all_features_val)
y_val = np.vstack(all_labels_val)


100%|██████████| 2/2 [06:34<00:00, 197.33s/it]


In [17]:

print(f"Final Feature Matrix X: {X_val.shape}") # Should be (Total_Images, 384)
print(f"Final Label Matrix y:   {y_val.shape}") # Should be (Total_Images, 5)

print(f"Final Feature Matrix X: {X_train.shape}") # Should be (Total_Images, 384)
print(f"Final Label Matrix y:   {y_train.shape}") # Should be (Total_Images, 5)


Final Feature Matrix X: (71, 1536)
Final Label Matrix y:   (71, 5)
Final Feature Matrix X: (286, 1536)
Final Label Matrix y:   (286, 5)


In [18]:
import numpy as np
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split, KFold
# Import R-squared and MAE for final evaluation
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression 
import optuna # NEW: Import Optuna


# --- 1. DATA SETUP: ---

# GENERATE SYNTHETIC DATA 


# 1.1. CONCATENATE: Merge the train and validation sets into one full dataset (X_full)
X_full = np.concatenate([X_train, X_val], axis=0)
y_full = np.concatenate([y_train, y_val], axis=0)

print(f"Combined X_full shape: {X_full.shape}") 
print(f"Combined y_full shape: {y_full.shape}")
print(f"Number of Target Variables: {y_full.shape[1]}")

# 1.2. SPLIT: Reserve the final, unseen TEST set (20% of the total data)
X_cv_tuning, X_test, y_cv_tuning, y_test = train_test_split(
    X_full, y_full, test_size=0.2, random_state=42
)

print(f"\nFinal CV Tuning Samples (Used for K-Fold): {X_cv_tuning.shape[0]}")
print(f"Final Test Samples (Holdout for Evaluation): {X_test.shape[0]}")


# --- 2. SETUP AND UTILITIES ---

# K-Fold strategy for cross-validation (n_splits=3 as requested)
KF = KFold(n_splits=3, shuffle=True, random_state=42)

# Global variables for tuning data
X_TUNING = X_cv_tuning
Y_TUNING = y_cv_tuning


# --- 3. OPTUNA OBJECTIVE FUNCTIONS (New for Optuna Tuning) ---
# We define objective functions to be maximized (R2 score).

def objective_xgb(trial: optuna.Trial):
    """Objective function for XGBoost tuning using K-Fold CV."""
    
    # 3.1 Define the search space using Optuna's suggestion methods
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000, step=500),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),
        'random_state': 42,
        'n_jobs': -1,
        # GPU setting for P100
        'tree_method': 'gpu_hist', 
    }
    
    # 3.2 Perform K-Fold Cross-Validation
    r2_scores = []
    
    for train_idx, val_idx in KF.split(X_TUNING, Y_TUNING):
        X_tr, X_val = X_TUNING[train_idx], X_TUNING[val_idx]
        y_tr, y_val = Y_TUNING[train_idx], Y_TUNING[val_idx]

        model = MultiOutputRegressor(xgb.XGBRegressor(**param))
        model.fit(X_tr, y_tr)
        
        preds = model.predict(X_val)
        
        # Calculate the R2 score for this fold (across all targets)
        r2 = r2_score(y_val, preds)
        r2_scores.append(r2)
        
    # Optuna maximizes the average score
    return np.mean(r2_scores)


def objective_lgb(trial: optuna.Trial):
    """Objective function for LightGBM tuning using K-Fold CV."""
    
    # 3.1 Define the search space
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000, step=500),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'max_depth': trial.suggest_int('max_depth', 5, 15),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 60),
        'random_state': 42,
        'n_jobs': -1,
        'verbose': -1,
        # GPU setting for P100
        'device': 'gpu', 
    }
    
    # 3.2 Perform K-Fold Cross-Validation
    r2_scores = []
    
    for train_idx, val_idx in KF.split(X_TUNING, Y_TUNING):
        X_tr, X_val = X_TUNING[train_idx], X_TUNING[val_idx]
        y_tr, y_val = Y_TUNING[train_idx], Y_TUNING[val_idx]

        model = MultiOutputRegressor(lgb.LGBMRegressor(**param))
        model.fit(X_tr, y_tr)
        
        preds = model.predict(X_val)
        
        # Calculate the R2 score for this fold (across all targets)
        r2 = r2_score(y_val, preds)
        r2_scores.append(r2)
        
    # Optuna maximizes the average score
    return np.mean(r2_scores)


# --- 4. OPTUNA EXECUTION ---
print("\n--- Starting XGBoost Hyperparameter Tuning with Optuna ---")
# 
xgb_study = optuna.create_study(direction="maximize", study_name="XGBoost_R2_Tuning")
xgb_study.optimize(objective_xgb, n_trials=10, show_progress_bar=True) # Increased trials for better search

print("\nFinished XGBoost Tuning.")
print(f"Best Optuna R2 Score: {xgb_study.best_value:.4f}")
print(f"Best XGBoost Parameters: {xgb_study.best_params}")

# Train the final XGBoost model using the best parameters found
best_xgb_params = xgb_study.best_params
best_model_xgb = MultiOutputRegressor(xgb.XGBRegressor(
    **best_xgb_params,
    random_state=42,
    n_jobs=-1,
    tree_method='gpu_hist' # Ensure GPU is used in the final model
))
best_model_xgb.fit(X_TUNING, Y_TUNING)


print("\n--- Starting LightGBM Hyperparameter Tuning with Optuna ---")
lgb_study = optuna.create_study(direction="maximize", study_name="LGBM_R2_Tuning")
lgb_study.optimize(objective_lgb, n_trials=10, show_progress_bar=True)

print("\nFinished LightGBM Tuning.")
print(f"Best Optuna R2 Score: {lgb_study.best_value:.4f}")
print(f"Best LightGBM Parameters: {lgb_study.best_params}")

# Train the final LightGBM model using the best parameters found
best_lgb_params = lgb_study.best_params
best_model_lgb = MultiOutputRegressor(lgb.LGBMRegressor(
    **best_lgb_params,
    random_state=42,
    n_jobs=-1,
    verbose=-1,
    device='gpu' # Ensure GPU is used in the final model
))
best_model_lgb.fit(X_TUNING, Y_TUNING)


# --- 5. WEIGHT TUNING VIA OUT-OF-FOLD (OOF) PREDICTIONS ---
# The process for OOF generation and meta-learner training remains the same.

print("\n--- Generating OOF Predictions for Weight Tuning ---")



[I 2025-11-19 10:11:12,536] A new study created in memory with name: XGBoost_R2_Tuning


Combined X_full shape: (357, 1536)
Combined y_full shape: (357, 5)
Number of Target Variables: 5

Final CV Tuning Samples (Used for K-Fold): 285
Final Test Samples (Holdout for Evaluation): 72

--- Starting XGBoost Hyperparameter Tuning with Optuna ---


  0%|          | 0/10 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_metho

[I 2025-11-19 10:12:41,796] Trial 0 finished with value: 0.5163502572717954 and parameters: {'n_estimators': 1500, 'learning_rate': 0.024949721128593054, 'max_depth': 10, 'subsample': 0.8997653092515834, 'colsample_bytree': 0.6394486962788082, 'reg_alpha': 0.000720980346625717}. Best is trial 0 with value: 0.5163502572717954.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.

[I 2025-11-19 10:14:12,537] Trial 1 finished with value: 0.5170768970255887 and parameters: {'n_estimators': 1000, 'learning_rate': 0.017423347879703035, 'max_depth': 10, 'subsample': 0.8923344534219382, 'colsample_bytree': 0.6175847313566134, 'reg_alpha': 0.00016173348426008467}. Best is trial 1 with value: 0.5170768970255887.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_

[I 2025-11-19 10:14:53,133] Trial 2 finished with value: 0.5395268607700987 and parameters: {'n_estimators': 1500, 'learning_rate': 0.06055219310200618, 'max_depth': 6, 'subsample': 0.6184876956992409, 'colsample_bytree': 0.8187939548444243, 'reg_alpha': 9.193814679793102}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 10:16:06,940] Trial 3 finished with value: 0.5104925883768439 and parameters: {'n_estimators': 1500, 'learning_rate': 0.03438956073739088, 'max_depth': 8, 'subsample': 0.8322652840191797, 'colsample_bytree': 0.74681038318077, 'reg_alpha': 0.16597364696007733}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 10:16:40,979] Trial 4 finished with value: 0.5171223130985475 and parameters: {'n_estimators': 1000, 'learning_rate': 0.007140220157431501, 'max_depth': 4, 'subsample': 0.9024200875214361, 'colsample_bytree': 0.920484683824366, 'reg_alpha': 0.0056531152656817615}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_

[I 2025-11-19 10:17:47,316] Trial 5 finished with value: 0.4960752576021301 and parameters: {'n_estimators': 1000, 'learning_rate': 0.002034314174014324, 'max_depth': 6, 'subsample': 0.6869802593622558, 'colsample_bytree': 0.869098060003196, 'reg_alpha': 0.004006927345583879}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),


[I 2025-11-19 10:19:24,318] Trial 6 finished with value: 0.5115395160623947 and parameters: {'n_estimators': 2000, 'learning_rate': 0.034386927962670434, 'max_depth': 9, 'subsample': 0.6814090461352644, 'colsample_bytree': 0.923428765515977, 'reg_alpha': 0.612003373551684}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_

[I 2025-11-19 10:20:01,598] Trial 7 finished with value: 0.5130145213382297 and parameters: {'n_estimators': 1000, 'learning_rate': 0.05177298448352979, 'max_depth': 6, 'subsample': 0.743570670523065, 'colsample_bytree': 0.8203361343127212, 'reg_alpha': 0.0006550508131083998}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),


[I 2025-11-19 10:21:42,481] Trial 8 finished with value: 0.4957841559150243 and parameters: {'n_estimators': 2000, 'learning_rate': 0.03550653344593175, 'max_depth': 10, 'subsample': 0.7689093962407032, 'colsample_bytree': 0.9493556877329512, 'reg_alpha': 0.038065740401158836}. Best is trial 2 with value: 0.5395268607700987.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



[I 2025-11-19 10:23:12,020] Trial 9 finished with value: 0.5278843152785241 and parameters: {'n_estimators': 1500, 'learning_rate': 0.021620564434189776, 'max_depth': 8, 'subsample': 0.8807405554800418, 'colsample_bytree': 0.6301466167823653, 'reg_alpha': 0.014166394074030992}. Best is trial 2 with value: 0.5395268607700987.

Finished XGBoost Tuning.
Best Optuna R2 Score: 0.5395
Best XGBoost Parameters: {'n_estimators': 1500, 'learning_rate': 0.06055219310200618, 'max_depth': 6, 'subsample': 0.6184876956992409, 'colsample_bytree': 0.8187939548444243, 'reg_alpha': 9.193814679793102}



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-11-19 10:23:30,014] A new study created in memory with name: LGBM_R2_Tuning



--- Starting LightGBM Hyperparameter Tuning with Optuna ---


  0%|          | 0/10 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:24:17,092] Trial 0 finished with value: 0.33843778934800794 and parameters: {'n_estimators': 1000, 'learning_rate': 0.0012952015608560996, 'num_leaves': 25, 'max_depth': 10, 'subsample': 0.6145690523764018, 'colsample_bytree': 0.6093207297959806, 'min_child_samples': 37}. Best is trial 0 with value: 0.33843778934800794.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:24:46,549] Trial 1 finished with value: 0.38735013403198454 and parameters: {'n_estimators': 500, 'learning_rate': 0.0033583203470977396, 'num_leaves': 98, 'max_depth': 7, 'subsample': 0.8697244634890574, 'colsample_bytree': 0.9076500068037598, 'min_child_samples': 35}. Best is trial 1 with value: 0.38735013403198454.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:25:37,547] Trial 2 finished with value: 0.5070950650193587 and parameters: {'n_estimators': 1500, 'learning_rate': 0.0050774673316514325, 'num_leaves': 33, 'max_depth': 13, 'subsample': 0.8619532734871667, 'colsample_bytree': 0.7498378293868397, 'min_child_samples': 46}. Best is trial 2 with value: 0.5070950650193587.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:26:42,417] Trial 3 finished with value: 0.3345465868912479 and parameters: {'n_estimators': 500, 'learning_rate': 0.0016371003920572203, 'num_leaves': 55, 'max_depth': 11, 'subsample': 0.9367360919638443, 'colsample_bytree': 0.7186358046696854, 'min_child_samples': 13}. Best is trial 2 with value: 0.5070950650193587.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:27:15,769] Trial 4 finished with value: 0.5014087157005684 and parameters: {'n_estimators': 500, 'learning_rate': 0.013415617511603929, 'num_leaves': 82, 'max_depth': 14, 'subsample': 0.8907658010385647, 'colsample_bytree': 0.6336646927850399, 'min_child_samples': 27}. Best is trial 2 with value: 0.5070950650193587.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:27:47,947] Trial 5 finished with value: 0.26359333936129226 and parameters: {'n_estimators': 1000, 'learning_rate': 0.0011539484999293222, 'num_leaves': 27, 'max_depth': 10, 'subsample': 0.6754881615987093, 'colsample_bytree': 0.8868642929705313, 'min_child_samples': 57}. Best is trial 2 with value: 0.5070950650193587.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:28:14,764] Trial 6 finished with value: 0.52037154940834 and parameters: {'n_estimators': 500, 'learning_rate': 0.02357235552208104, 'num_leaves': 57, 'max_depth': 14, 'subsample': 0.8791716756192286, 'colsample_bytree': 0.6704042888079431, 'min_child_samples': 34}. Best is trial 6 with value: 0.52037154940834.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:32:48,541] Trial 7 finished with value: 0.47533005887942825 and parameters: {'n_estimators': 2000, 'learning_rate': 0.019156911280219308, 'num_leaves': 93, 'max_depth': 15, 'subsample': 0.796902209348559, 'colsample_bytree': 0.9356779024816784, 'min_child_samples': 16}. Best is trial 6 with value: 0.52037154940834.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:33:20,711] Trial 8 finished with value: 0.48726358088091154 and parameters: {'n_estimators': 1000, 'learning_rate': 0.006486181372592724, 'num_leaves': 27, 'max_depth': 6, 'subsample': 0.6617102628058283, 'colsample_bytree': 0.8046509543799092, 'min_child_samples': 52}. Best is trial 6 with value: 0.52037154940834.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 10:35:14,794] Trial 9 finished with value: 0.47919443746942764 and parameters: {'n_estimators': 1000, 'learning_rate': 0.0663846234781217, 'num_leaves': 39, 'max_depth': 14, 'subsample': 0.7208341785046763, 'colsample_bytree': 0.6830292356809667, 'min_child_samples': 16}. Best is trial 6 with value: 0.52037154940834.

Finished LightGBM Tuning.
Best Optuna R2 Score: 0.5204
Best LightGBM Parameters: {'n_estimators': 500, 'learning_rate': 0.02357235552208104, 'num_leaves': 57, 'max_depth': 14, 'subsample': 0.8791716756192286, 'colsample_bytree': 0.6704042888079431, 'min_child_samples': 34}

--- Generating OOF Predictions for Weight Tuning ---


In [19]:
# lgb_study.best_params

In [20]:
# # The generate_oof function remains valid for use with the final Optuna-tuned models
# def generate_oof(model_param, X_train, y_train, kf_splitter):
#     """Generates out-of-fold predictions for the model."""
#     oof_preds = np.zeros_like(y_train)
    
#     # Iterate through the K-Fold splits
#     for fold, (train_idx, val_idx) in enumerate(kf_splitter.split(X_train, y_train)):
#         X_tr, X_val = X_train[train_idx], X_train[val_idx]
#         y_tr, y_val = y_train[train_idx], y_train[val_idx]
        
#         # We clone the model to ensure a fresh training for each fold
#         fold_model = MultiOutputRegressor(model_param.estimator) 
#         fold_model.fit(X_tr, y_tr)
        
#         # Predict on the validation subset (the "out-of-fold" data)
#         oof_preds[val_idx] = fold_model.predict(X_val)
        
#     return oof_preds

# # 5.1 Generate OOF predictions using the best hyperparameters found above
# oof_preds_xgb = generate_oof(best_model_xgb, X_TUNING, Y_TUNING, KF)
# oof_preds_lgb = generate_oof(best_model_lgb, X_TUNING, Y_TUNING, KF)

# # 5.2 Train a Meta-Learner (Linear Regression) to find the optimal blending coefficients (weights)

# print("\n--- Training Linear Meta-Learner for Optimal Blending Weights ---")
# n_targets = Y_TUNING.shape[1]
# meta_learners = []
# optimal_weights = []

# for i in range(n_targets):
#     # Prepare OOF features: [XGB_Pred_Target_i, LGBM_Pred_Target_i]
#     X_meta_i = np.column_stack([oof_preds_xgb[:, i], oof_preds_lgb[:, i]])
#     y_meta_i = Y_TUNING[:, i]

#     # Train a meta-model for the current output target
#     meta_model = LinearRegression()
#     meta_model.fit(X_meta_i, y_meta_i)
#     meta_learners.append(meta_model)

#     # Store the weights (coefficients)
#     weights = meta_model.coef_
#     optimal_weights.append(weights)
    
#     # Print the learned weights for visibility
#     print(f"Target {i+1} Optimal Weights: XGB={weights[0]:.3f}, LGBM={weights[1]:.3f}")


# # --- 6. FINAL ENSEMBLE EVALUATION WITH OPTIMAL BLENDING ---

# print("\n--- Final Ensemble Prediction on Holdout Test Set ---")

# # 6.1 Predict on the unseen X_test set using the best base models
# preds_xgb_tuned = best_model_xgb.predict(X_test)
# preds_lgb_tuned = best_model_lgb.predict(X_test)

# # 6.2 Combine predictions using the learned Meta-Learners
# ensemble_preds_optimal = np.zeros_like(y_test)

# for i in range(n_targets):
#     # Prepare test features for the meta-model
#     X_test_meta_i = np.column_stack([preds_xgb_tuned[:, i], preds_lgb_tuned[:, i]])
    
#     # Predict using the trained meta-learner for the current output target
#     ensemble_preds_optimal[:, i] = meta_learners[i].predict(X_test_meta_i)

# # POST-PROCESSING: Biomass cannot be negative
# ensemble_preds_optimal = np.maximum(ensemble_preds_optimal, 0)

# # Calculate Scores (both R2 and MAE)
# test_r2_optimal = r2_score(y_test, ensemble_preds_optimal)
# test_mae_optimal = mean_absolute_error(y_test, ensemble_preds_optimal)

# print(f"\n✅ Final Ensemble Test R2 Score (Optimal Blending): {test_r2_optimal:.4f}")
# print(f"✅ Final Ensemble Test MAE (Optimal Blending): {test_mae_optimal:.4f}")

# # For comparison, re-calculate 50/50 simple average score
# simple_ensemble_preds = (0.5 * preds_xgb_tuned) + (0.5 * preds_lgb_tuned)
# simple_ensemble_preds = np.maximum(simple_ensemble_preds, 0)
# test_r2_simple = r2_score(y_test, simple_ensemble_preds)
# test_mae_simple = mean_absolute_error(y_test, simple_ensemble_preds)

# print(f"\n   For comparison: Simple Average (50/50) Test R2: {test_r2_simple:.4f}")
# print(f"   For comparison: Simple Average (50/50) Test MAE: {test_mae_simple:.4f}")

In [21]:
LGB_PARAMS=best_lgb_params
XGB_PARAMS=best_xgb_params


In [22]:
import numpy as np
import xgboost as xgb
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.datasets import make_regression

# --- 0. HYPERPARAMETERS (Provided by User) ---






# --- 2. QUICK TRAINING ---

# 2.1 Train XGBoost
print("\n--- Training XGBoost with Tuned Parameters ---")
xgb_est = xgb.XGBRegressor(**XGB_PARAMS)
model_xgb = MultiOutputRegressor(xgb_est)
model_xgb.fit(X_train, y_train)
print("XGBoost training complete.")

# 2.2 Train LightGBM
print("\n--- Training LightGBM with Tuned Parameters ---")
lgb_est = lgb.LGBMRegressor(**LGB_PARAMS)
model_lgb = MultiOutputRegressor(lgb_est)
model_lgb.fit(X_train, y_train)
print("LightGBM training complete.")

# --- 3. QUICK ENSEMBLE PREDICT & EVALUATION (50/50 Simple Average) ---

print("\n--- Calculating Ensemble Predictions on X_val (50/50 Blend) ---")
preds_xgb = model_xgb.predict(X_val)
preds_lgb = model_lgb.predict(X_val)

# Simple Average the predictions (50% XGB + 50% LGBM)
ensemble_preds = (0.5 * preds_xgb) + (0.5 * preds_lgb)

# Post-processing: Clip predictions to 0 min
ensemble_preds = np.maximum(ensemble_preds, 0)

# Calculate Scores
val_mae = mean_absolute_error(y_val, ensemble_preds)
val_r2 = r2_score(y_val, ensemble_preds)

print(f"\n✅ Ensemble Validation MAE (50/50 Blend): {val_mae:.4f}")
print(f"✅ Ensemble Validation R2 Score (50/50 Blend): {val_r2:.4f}")


--- Training XGBoost with Tuned Parameters ---
XGBoost training complete.

--- Training LightGBM with Tuned Parameters ---
LightGBM training complete.

--- Calculating Ensemble Predictions on X_val (50/50 Blend) ---

✅ Ensemble Validation MAE (50/50 Blend): 8.7428
✅ Ensemble Validation R2 Score (50/50 Blend): 0.5557


In [23]:
import joblib
import os

# Create a folder for models if it doesn't exist
os.makedirs("models", exist_ok=True)

print("Saving models...")

# 1. Save the XGBoost Ensemble
joblib.dump(best_model_xgb, "models/xgboost_ensemble.pkl")

# 2. Save the LightGBM Ensemble
joblib.dump(best_model_lgb, "models/lightgbm_ensemble.pkl")

print("Models saved successfully in the 'models' folder!")

Saving models...
Models saved successfully in the 'models' folder!



    E.g. tree_method = "hist", device = "cuda"

