In [1]:
!pip install transformers 
!pip install protobuf==3.20.3

Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.33.0
    Uninstalling protobuf-6.33.0:
      Successfully uninstalled protobuf-6.33.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
a2a-sdk 0.3.10 requi

In [2]:
import pandas as pd 
import numpy as np 
import os 
import glob 
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
import timm # For pre-trained models
from tqdm import tqdm # For progress bars
from PIL import Image



In [3]:
import torch
from transformers import AutoImageProcessor, AutoModel

# This is the path to the dataset you added in Step 1
# The exact path may vary slightly based on which dataset you chose.
MODEL_PATH = '/kaggle/input/dinov2/pytorch/small/1' 

# Load the processor from the local files
processor = AutoImageProcessor.from_pretrained(MODEL_PATH)

# Load the model from the local files
model = AutoModel.from_pretrained(MODEL_PATH)

model.eval()
print("DINOv2 model loaded successfully from local files.")

2025-11-19 05:24:57.889428: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763529898.090604      48 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763529898.154304      48 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


DINOv2 model loaded successfully from local files.


In [4]:
# --- Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")



Using device: cuda


In [5]:
BASE_PATH='/kaggle/input/csiro-biomass'
train_meta=pd.read_csv(os.path.join(BASE_PATH,'train.csv'))

In [6]:
# 1. Pivot the target variables
# (Pivoting on 'image_id' is a bit cleaner than the full path)
targets_df = train_meta.pivot(index='image_path', 
                              columns='target_name', 
                              values='target')

In [7]:
# 2. Get the unique metadata for each image
# (State, Species, etc. are repeated, so we just grab the first instance)
meta_df = train_meta[[  'image_path', 'Sampling_Date', 'State', 'Species',
       'Pre_GSHH_NDVI', 'Height_Ave_cm']] \
                      .drop_duplicates(subset='image_path') \
                      .set_index('image_path')





In [8]:
# 3. Join them together
# This creates one clean row per image with all data
train_df = meta_df.join(targets_df).reset_index()

In [9]:
# 4. Define our 5 output targets
TARGET_COLS = list(train_df.columns[-5:])


# Weights: [Dry_Total_g, Dry_Green_g, Dry_Dead_g, Dry_Clover_g, Dry_Grass_g]
COMP_WEIGHTS = torch.tensor([0.5, 0.2, 0.1, 0.1,0.1]).to(device)

# Now train_df is ready!
print(train_df.head())

               image_path Sampling_Date State            Species  \
0  train/ID1011485656.jpg      2015/9/4   Tas    Ryegrass_Clover   
1  train/ID1012260530.jpg      2015/4/1   NSW            Lucerne   
2  train/ID1025234388.jpg      2015/9/1    WA  SubcloverDalkeith   
3  train/ID1028611175.jpg     2015/5/18   Tas           Ryegrass   
4  train/ID1035947949.jpg     2015/9/11   Tas           Ryegrass   

   Pre_GSHH_NDVI  Height_Ave_cm  Dry_Clover_g  Dry_Dead_g  Dry_Green_g  \
0           0.62         4.6667        0.0000     31.9984      16.2751   
1           0.55        16.0000        0.0000      0.0000       7.6000   
2           0.38         1.0000        6.0500      0.0000       0.0000   
3           0.66         5.0000        0.0000     30.9703      24.2376   
4           0.54         3.5000        0.4343     23.2239      10.5261   

   Dry_Total_g    GDM_g  
0      48.2735  16.2750  
1       7.6000   7.6000  
2       6.0500   6.0500  
3      55.2079  24.2376  
4      34.1844  

In [10]:
train_df_sample = train_df.iloc[:round(len(train_df)*0.8)]
val_df_sample = train_df.iloc[round(len(train_df)*0.8):]

In [11]:
val_df_sample.shape

(71, 11)

In [12]:
# First, you might need to install it:

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2 # Albumentations uses OpenCV

# --- 1. Define your transforms ---
# These are applied ONLY to the training data
train_transforms = A.Compose([
    A.HorizontalFlip(p=0.5),     # 50% chance to flip horizontally
    A.VerticalFlip(p=0.5),       # 50% chance to flip vertically
    A.RandomRotate90(p=0.5),     # 50% chance to rotate 90 degrees
    
    # Color/Distortion
    A.GaussNoise(p=0.2),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    
    # Normalize (use mean/std of ImageNet)
    
    # Convert to PyTorch Tensor
    ToTensorV2()
])

# These are for validation. We ONLY resize and normalize. No flipping/distortion.
val_transforms = A.Compose([
     A.HorizontalFlip(p=0.5),     # 50% chance to flip horizontally
    A.VerticalFlip(p=0.5),       # 50% chance to flip vertically
    A.RandomRotate90(p=0.5),     # 50% chance to rotate 90 degrees
    
    # Color/Distortion
    A.GaussNoise(p=0.2),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    
    ToTensorV2()
])


# --- 2. Update the Custom Dataset Class ---
class BiomassDataset(Dataset):
    def __init__(self, df, target_cols, base_path, processor,model,transforms=None,):
        self.df = df
        self.image_paths = [os.path.join(base_path, p) for p in df['image_path']]
        self.labels = df[target_cols].values.astype(np.float32)
        self.transforms = transforms # <-- NEW
        
        self.model=model
        self.processor=processor 
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        feature_list=[]
        # Load image with OpenCV (used by albumentations)
        # cv2 loads as BGR, so we convert to RGB
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        labels = self.labels[idx]

        image_features=self.processor(image,return_tensors='pt')

        with torch.no_grad():
            model_out=self.model(**image_features)
            feature_list.append(model_out.pooler_output.cpu())
        
        # Apply transforms (if they exist)
        if self.transforms:
            # Albumentations returns a dictionary
            transformed = self.transforms(image=image)
            image = transformed['image']
        
        # Labels are already a numpy array, just convert to tensor
        labels = torch.tensor(labels)

        return image, labels,np.array(feature_list)


  A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),


In [13]:

# --- 3. Create Datasets with new transforms ---
train_dataset = BiomassDataset(df=train_df_sample, 
                               target_cols=TARGET_COLS, 
                               base_path=BASE_PATH,
                               transforms=train_transforms,processor=processor,model=model) # <-- Pass transforms

val_dataset = BiomassDataset(df=val_df_sample, 
                             target_cols=TARGET_COLS, 
                             base_path=BASE_PATH,
                             transforms=train_transforms,processor=processor,model=model) # <-- Pass transforms

# DataLoaders are created the same way
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)



In [14]:
# Test it
images, labels,encodings = next(iter(train_loader))

print(f"Image batch shape: {images.shape}") # Should be [32, 3, 256, 256]
print(f"Labels batch shape: {labels.shape}") # Should be [32, 5]
print(f"Feature batch shape: {encodings.shape}")
# Image batch shape: torch.Size([32, 1000, 2000, 3])
# Labels batch shape: torch.Size([32, 5])
# Feature batch shape: torch.Size([32, 1, 1, 384])

Image batch shape: torch.Size([32, 1000, 2000, 3])
Labels batch shape: torch.Size([32, 5])
Feature batch shape: torch.Size([32, 1, 1, 384])


In [15]:
import numpy as np
import torch
from tqdm import tqdm

# Containers for the full dataset
all_features = []
all_labels = []

print("Extracting features from existing pipeline...")

for images, labels, encodings in tqdm(train_loader):
    
 
    # .view(batch_size, -1) flattens everything after the batch dimension
    batch_features = encodings.view(encodings.shape[0], -1).cpu().numpy()
    
    # 2. Convert labels to numpy
    batch_labels = labels.cpu().numpy()
    
    # 3. Collect
    all_features.append(batch_features)
    all_labels.append(batch_labels)

# Concatenate all batches into single numpy arrays
X_train = np.vstack(all_features)
y_train = np.vstack(all_labels)


Extracting features from existing pipeline...


100%|██████████| 9/9 [00:36<00:00,  4.05s/it]


In [16]:
# Containers for the full dataset
all_features_val = []
all_labels_val = []
for images, labels, encodings in tqdm(val_loader):
    
 
    # .view(batch_size, -1) flattens everything after the batch dimension
    batch_features_val = encodings.view(encodings.shape[0], -1).cpu().numpy()
    
    # 2. Convert labels to numpy
    batch_labels_val = labels.cpu().numpy()
    
    # 3. Collect
    all_features_val.append(batch_features_val)
    all_labels_val.append(batch_labels_val)

# Concatenate all batches into single numpy arrays
X_val = np.vstack(all_features_val)
y_val = np.vstack(all_labels_val)


100%|██████████| 2/2 [00:14<00:00,  7.30s/it]


In [18]:

print(f"Final Feature Matrix X: {X_val.shape}") # Should be (Total_Images, 384)
print(f"Final Label Matrix y:   {y_val.shape}") # Should be (Total_Images, 5)

print(f"Final Feature Matrix X: {X_train.shape}") # Should be (Total_Images, 384)
print(f"Final Label Matrix y:   {y_train.shape}") # Should be (Total_Images, 5)
# Final Feature Matrix X: (71, 384)
# Final Label Matrix y:   (71, 5)
# Final Feature Matrix X: (286, 384)
# Final Label Matrix y:   (286, 5)

Final Feature Matrix X: (71, 384)
Final Label Matrix y:   (71, 5)
Final Feature Matrix X: (286, 384)
Final Label Matrix y:   (286, 5)


In [24]:
import numpy as np
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split, KFold
# Import R-squared and MAE for final evaluation
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression 
import optuna # NEW: Import Optuna


# --- 1. DATA SETUP: ---

# GENERATE SYNTHETIC DATA 


# 1.1. CONCATENATE: Merge the train and validation sets into one full dataset (X_full)
X_full = np.concatenate([X_train, X_val], axis=0)
y_full = np.concatenate([y_train, y_val], axis=0)

print(f"Combined X_full shape: {X_full.shape}") 
print(f"Combined y_full shape: {y_full.shape}")
print(f"Number of Target Variables: {y_full.shape[1]}")

# 1.2. SPLIT: Reserve the final, unseen TEST set (20% of the total data)
X_cv_tuning, X_test, y_cv_tuning, y_test = train_test_split(
    X_full, y_full, test_size=0.2, random_state=42
)

print(f"\nFinal CV Tuning Samples (Used for K-Fold): {X_cv_tuning.shape[0]}")
print(f"Final Test Samples (Holdout for Evaluation): {X_test.shape[0]}")


# --- 2. SETUP AND UTILITIES ---

# K-Fold strategy for cross-validation (n_splits=3 as requested)
KF = KFold(n_splits=3, shuffle=True, random_state=42)

# Global variables for tuning data
X_TUNING = X_cv_tuning
Y_TUNING = y_cv_tuning


# --- 3. OPTUNA OBJECTIVE FUNCTIONS (New for Optuna Tuning) ---
# We define objective functions to be maximized (R2 score).

def objective_xgb(trial: optuna.Trial):
    """Objective function for XGBoost tuning using K-Fold CV."""
    
    # 3.1 Define the search space using Optuna's suggestion methods
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000, step=500),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),
        'random_state': 42,
        'n_jobs': -1,
        # GPU setting for P100
        'tree_method': 'gpu_hist', 
    }
    
    # 3.2 Perform K-Fold Cross-Validation
    r2_scores = []
    
    for train_idx, val_idx in KF.split(X_TUNING, Y_TUNING):
        X_tr, X_val = X_TUNING[train_idx], X_TUNING[val_idx]
        y_tr, y_val = Y_TUNING[train_idx], Y_TUNING[val_idx]

        model = MultiOutputRegressor(xgb.XGBRegressor(**param))
        model.fit(X_tr, y_tr)
        
        preds = model.predict(X_val)
        
        # Calculate the R2 score for this fold (across all targets)
        r2 = r2_score(y_val, preds)
        r2_scores.append(r2)
        
    # Optuna maximizes the average score
    return np.mean(r2_scores)


def objective_lgb(trial: optuna.Trial):
    """Objective function for LightGBM tuning using K-Fold CV."""
    
    # 3.1 Define the search space
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000, step=500),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'max_depth': trial.suggest_int('max_depth', 5, 15),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 60),
        'random_state': 42,
        'n_jobs': -1,
        'verbose': -1,
        # GPU setting for P100
        'device': 'gpu', 
    }
    
    # 3.2 Perform K-Fold Cross-Validation
    r2_scores = []
    
    for train_idx, val_idx in KF.split(X_TUNING, Y_TUNING):
        X_tr, X_val = X_TUNING[train_idx], X_TUNING[val_idx]
        y_tr, y_val = Y_TUNING[train_idx], Y_TUNING[val_idx]

        model = MultiOutputRegressor(lgb.LGBMRegressor(**param))
        model.fit(X_tr, y_tr)
        
        preds = model.predict(X_val)
        
        # Calculate the R2 score for this fold (across all targets)
        r2 = r2_score(y_val, preds)
        r2_scores.append(r2)
        
    # Optuna maximizes the average score
    return np.mean(r2_scores)


# --- 4. OPTUNA EXECUTION ---
print("\n--- Starting XGBoost Hyperparameter Tuning with Optuna ---")
# 
xgb_study = optuna.create_study(direction="maximize", study_name="XGBoost_R2_Tuning")
xgb_study.optimize(objective_xgb, n_trials=10, show_progress_bar=True) # Increased trials for better search

print("\nFinished XGBoost Tuning.")
print(f"Best Optuna R2 Score: {xgb_study.best_value:.4f}")
print(f"Best XGBoost Parameters: {xgb_study.best_params}")

# Train the final XGBoost model using the best parameters found
best_xgb_params = xgb_study.best_params
best_model_xgb = MultiOutputRegressor(xgb.XGBRegressor(
    **best_xgb_params,
    random_state=42,
    n_jobs=-1,
    tree_method='gpu_hist' # Ensure GPU is used in the final model
))
best_model_xgb.fit(X_TUNING, Y_TUNING)


print("\n--- Starting LightGBM Hyperparameter Tuning with Optuna ---")
lgb_study = optuna.create_study(direction="maximize", study_name="LGBM_R2_Tuning")
lgb_study.optimize(objective_lgb, n_trials=10, show_progress_bar=True)

print("\nFinished LightGBM Tuning.")
print(f"Best Optuna R2 Score: {lgb_study.best_value:.4f}")
print(f"Best LightGBM Parameters: {lgb_study.best_params}")

# Train the final LightGBM model using the best parameters found
best_lgb_params = lgb_study.best_params
best_model_lgb = MultiOutputRegressor(lgb.LGBMRegressor(
    **best_lgb_params,
    random_state=42,
    n_jobs=-1,
    verbose=-1,
    device='gpu' # Ensure GPU is used in the final model
))
best_model_lgb.fit(X_TUNING, Y_TUNING)


# --- 5. WEIGHT TUNING VIA OUT-OF-FOLD (OOF) PREDICTIONS ---
# The process for OOF generation and meta-learner training remains the same.

print("\n--- Generating OOF Predictions for Weight Tuning ---")



[I 2025-11-19 05:51:39,133] A new study created in memory with name: XGBoost_R2_Tuning


Combined X_full shape: (357, 384)
Combined y_full shape: (357, 5)
Number of Target Variables: 5

Final CV Tuning Samples (Used for K-Fold): 285
Final Test Samples (Holdout for Evaluation): 72

--- Starting XGBoost Hyperparameter Tuning with Optuna ---


  0%|          | 0/10 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "his

[I 2025-11-19 05:52:23,784] Trial 0 finished with value: 0.5178951482997624 and parameters: {'n_estimators': 1500, 'learning_rate': 0.0042614877333521295, 'max_depth': 6, 'subsample': 0.7144731741329484, 'colsample_bytree': 0.6306763902213242, 'reg_alpha': 0.008598098195090133}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 05:52:46,573] Trial 1 finished with value: 0.5070766875619747 and parameters: {'n_estimators': 1500, 'learning_rate': 0.05419687608736156, 'max_depth': 6, 'subsample': 0.7007786271957113, 'colsample_bytree': 0.8216073038275788, 'reg_alpha': 7.872526227196881}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 05:53:13,143] Trial 2 finished with value: 0.45605667175509707 and parameters: {'n_estimators': 500, 'learning_rate': 0.003498715936258222, 'max_depth': 10, 'subsample': 0.7933280306511601, 'colsample_bytree': 0.6094114170298421, 'reg_alpha': 0.0011813548011895382}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 05:53:59,034] Trial 3 finished with value: 0.48145390195229304 and parameters: {'n_estimators': 1500, 'learning_rate': 0.031578084032634235, 'max_depth': 9, 'subsample': 0.6506728976588051, 'colsample_bytree': 0.7863706336119097, 'reg_alpha': 0.0035381310287971967}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



[I 2025-11-19 05:55:11,308] Trial 4 finished with value: 0.4541461937598184 and parameters: {'n_estimators': 1500, 'learning_rate': 0.0012678215515453779, 'max_depth': 8, 'subsample': 0.9065074459311449, 'colsample_bytree': 0.6900514872792954, 'reg_alpha': 0.8894169612054732}. Best is trial 0 with value: 0.5178951482997624.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "his

[I 2025-11-19 05:56:08,769] Trial 5 finished with value: 0.49199147296192525 and parameters: {'n_estimators': 1500, 'learning_rate': 0.01409717245126502, 'max_depth': 8, 'subsample': 0.62318570992849, 'colsample_bytree': 0.9077609674603228, 'reg_alpha': 0.00025283601985323883}. Best is trial 0 with value: 0.5178951482997624.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "his

[I 2025-11-19 05:56:26,791] Trial 6 finished with value: 0.49608968891326227 and parameters: {'n_estimators': 1000, 'learning_rate': 0.07273053045313024, 'max_depth': 6, 'subsample': 0.7832775387925266, 'colsample_bytree': 0.9338287262222742, 'reg_alpha': 0.014822330592685565}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 05:56:56,999] Trial 7 finished with value: 0.5108005084475166 and parameters: {'n_estimators': 1000, 'learning_rate': 0.007077538144414412, 'max_depth': 6, 'subsample': 0.6328550269183458, 'colsample_bytree': 0.8682351727008533, 'reg_alpha': 0.006352072338308333}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 10.0),

    E.g. tree_method = "his

[I 2025-11-19 05:57:33,717] Trial 8 finished with value: 0.5033411580274536 and parameters: {'n_estimators': 1000, 'learning_rate': 0.027377317591330916, 'max_depth': 8, 'subsample': 0.6167554184829165, 'colsample_bytree': 0.7021584927551735, 'reg_alpha': 0.004184284321820539}. Best is trial 0 with value: 0.5178951482997624.



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



[I 2025-11-19 05:58:25,060] Trial 9 finished with value: 0.5039019327580735 and parameters: {'n_estimators': 2000, 'learning_rate': 0.0014596443402862067, 'max_depth': 5, 'subsample': 0.6246635700025478, 'colsample_bytree': 0.7982125409922242, 'reg_alpha': 0.031360298024176905}. Best is trial 0 with value: 0.5178951482997624.

Finished XGBoost Tuning.
Best Optuna R2 Score: 0.5179
Best XGBoost Parameters: {'n_estimators': 1500, 'learning_rate': 0.0042614877333521295, 'max_depth': 6, 'subsample': 0.7144731741329484, 'colsample_bytree': 0.6306763902213242, 'reg_alpha': 0.008598098195090133}



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-11-19 05:58:43,092] A new study created in memory with name: LGBM_R2_Tuning



--- Starting LightGBM Hyperparameter Tuning with Optuna ---


  0%|          | 0/10 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 05:59:04,338] Trial 0 finished with value: 0.46126151575191116 and parameters: {'n_estimators': 1500, 'learning_rate': 0.005189609797830659, 'num_leaves': 62, 'max_depth': 13, 'subsample': 0.7573417830396347, 'colsample_bytree': 0.9243101806003199, 'min_child_samples': 55}. Best is trial 0 with value: 0.46126151575191116.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 05:59:24,325] Trial 1 finished with value: 0.4549968027308953 and parameters: {'n_estimators': 1500, 'learning_rate': 0.030667665869664563, 'num_leaves': 34, 'max_depth': 14, 'subsample': 0.7881254145621314, 'colsample_bytree': 0.7408931209749557, 'min_child_samples': 42}. Best is trial 0 with value: 0.46126151575191116.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 05:59:47,332] Trial 2 finished with value: 0.4614238100003023 and parameters: {'n_estimators': 1500, 'learning_rate': 0.050087519409197215, 'num_leaves': 32, 'max_depth': 9, 'subsample': 0.8062705375659429, 'colsample_bytree': 0.8902096062711022, 'min_child_samples': 40}. Best is trial 2 with value: 0.4614238100003023.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 05:59:53,652] Trial 3 finished with value: 0.43714051622900946 and parameters: {'n_estimators': 500, 'learning_rate': 0.009184575391376992, 'num_leaves': 37, 'max_depth': 13, 'subsample': 0.7521875182542062, 'colsample_bytree': 0.7221568146328383, 'min_child_samples': 58}. Best is trial 2 with value: 0.4614238100003023.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 06:00:34,942] Trial 4 finished with value: 0.4535622101953302 and parameters: {'n_estimators': 2000, 'learning_rate': 0.03256732914073932, 'num_leaves': 92, 'max_depth': 9, 'subsample': 0.7093462328573975, 'colsample_bytree': 0.8168299289418148, 'min_child_samples': 30}. Best is trial 2 with value: 0.4614238100003023.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 06:00:53,972] Trial 5 finished with value: 0.4577849664185112 and parameters: {'n_estimators': 1000, 'learning_rate': 0.003825889868105876, 'num_leaves': 83, 'max_depth': 14, 'subsample': 0.8255093294463499, 'colsample_bytree': 0.7850207428685527, 'min_child_samples': 31}. Best is trial 2 with value: 0.4614238100003023.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 06:01:10,250] Trial 6 finished with value: 0.443484697775195 and parameters: {'n_estimators': 1500, 'learning_rate': 0.06590594950616549, 'num_leaves': 33, 'max_depth': 12, 'subsample': 0.9189206300065661, 'colsample_bytree': 0.8062935578296611, 'min_child_samples': 51}. Best is trial 2 with value: 0.4614238100003023.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 06:02:30,429] Trial 7 finished with value: 0.4728692712448508 and parameters: {'n_estimators': 2000, 'learning_rate': 0.00576235388506513, 'num_leaves': 29, 'max_depth': 8, 'subsample': 0.7489070946327819, 'colsample_bytree': 0.6841693879601901, 'min_child_samples': 12}. Best is trial 7 with value: 0.4728692712448508.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 06:02:56,657] Trial 8 finished with value: 0.45884657206192125 and parameters: {'n_estimators': 2000, 'learning_rate': 0.019304126980988306, 'num_leaves': 39, 'max_depth': 6, 'subsample': 0.866416996939983, 'colsample_bytree': 0.7530514212674109, 'min_child_samples': 42}. Best is trial 7 with value: 0.4728692712448508.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),


[I 2025-11-19 06:03:31,260] Trial 9 finished with value: 0.4629460942397725 and parameters: {'n_estimators': 2000, 'learning_rate': 0.032477709987925145, 'num_leaves': 79, 'max_depth': 14, 'subsample': 0.7353455843460054, 'colsample_bytree': 0.8074606237414603, 'min_child_samples': 35}. Best is trial 7 with value: 0.4728692712448508.

Finished LightGBM Tuning.
Best Optuna R2 Score: 0.4729
Best LightGBM Parameters: {'n_estimators': 2000, 'learning_rate': 0.00576235388506513, 'num_leaves': 29, 'max_depth': 8, 'subsample': 0.7489070946327819, 'colsample_bytree': 0.6841693879601901, 'min_child_samples': 12}

--- Generating OOF Predictions for Weight Tuning ---


AttributeError: 'MultiOutputRegressor' object has no attribute 'estimator_'

In [27]:
xgb_study.best_params

{'n_estimators': 1500,
 'learning_rate': 0.0042614877333521295,
 'max_depth': 6,
 'subsample': 0.7144731741329484,
 'colsample_bytree': 0.6306763902213242,
 'reg_alpha': 0.008598098195090133}

In [35]:
# The generate_oof function remains valid for use with the final Optuna-tuned models
def generate_oof(model_param, X_train, y_train, kf_splitter):
    """Generates out-of-fold predictions for the model."""
    oof_preds = np.zeros_like(y_train)
    
    # Iterate through the K-Fold splits
    for fold, (train_idx, val_idx) in enumerate(kf_splitter.split(X_train, y_train)):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train[train_idx], y_train[val_idx]
        
        # We clone the model to ensure a fresh training for each fold
        fold_model = MultiOutputRegressor(model_param.estimator) 
        fold_model.fit(X_tr, y_tr)
        
        # Predict on the validation subset (the "out-of-fold" data)
        oof_preds[val_idx] = fold_model.predict(X_val)
        
    return oof_preds

# 5.1 Generate OOF predictions using the best hyperparameters found above
oof_preds_xgb = generate_oof(best_model_xgb, X_TUNING, Y_TUNING, KF)
oof_preds_lgb = generate_oof(best_model_lgb, X_TUNING, Y_TUNING, KF)

# 5.2 Train a Meta-Learner (Linear Regression) to find the optimal blending coefficients (weights)

print("\n--- Training Linear Meta-Learner for Optimal Blending Weights ---")
n_targets = Y_TUNING.shape[1]
meta_learners = []
optimal_weights = []

for i in range(n_targets):
    # Prepare OOF features: [XGB_Pred_Target_i, LGBM_Pred_Target_i]
    X_meta_i = np.column_stack([oof_preds_xgb[:, i], oof_preds_lgb[:, i]])
    y_meta_i = Y_TUNING[:, i]

    # Train a meta-model for the current output target
    meta_model = LinearRegression()
    meta_model.fit(X_meta_i, y_meta_i)
    meta_learners.append(meta_model)

    # Store the weights (coefficients)
    weights = meta_model.coef_
    optimal_weights.append(weights)
    
    # Print the learned weights for visibility
    print(f"Target {i+1} Optimal Weights: XGB={weights[0]:.3f}, LGBM={weights[1]:.3f}")


# --- 6. FINAL ENSEMBLE EVALUATION WITH OPTIMAL BLENDING ---

print("\n--- Final Ensemble Prediction on Holdout Test Set ---")

# 6.1 Predict on the unseen X_test set using the best base models
preds_xgb_tuned = best_model_xgb.predict(X_test)
preds_lgb_tuned = best_model_lgb.predict(X_test)

# 6.2 Combine predictions using the learned Meta-Learners
ensemble_preds_optimal = np.zeros_like(y_test)

for i in range(n_targets):
    # Prepare test features for the meta-model
    X_test_meta_i = np.column_stack([preds_xgb_tuned[:, i], preds_lgb_tuned[:, i]])
    
    # Predict using the trained meta-learner for the current output target
    ensemble_preds_optimal[:, i] = meta_learners[i].predict(X_test_meta_i)

# POST-PROCESSING: Biomass cannot be negative
ensemble_preds_optimal = np.maximum(ensemble_preds_optimal, 0)

# Calculate Scores (both R2 and MAE)
test_r2_optimal = r2_score(y_test, ensemble_preds_optimal)
test_mae_optimal = mean_absolute_error(y_test, ensemble_preds_optimal)

print(f"\n✅ Final Ensemble Test R2 Score (Optimal Blending): {test_r2_optimal:.4f}")
print(f"✅ Final Ensemble Test MAE (Optimal Blending): {test_mae_optimal:.4f}")

# For comparison, re-calculate 50/50 simple average score
simple_ensemble_preds = (0.5 * preds_xgb_tuned) + (0.5 * preds_lgb_tuned)
simple_ensemble_preds = np.maximum(simple_ensemble_preds, 0)
test_r2_simple = r2_score(y_test, simple_ensemble_preds)
test_mae_simple = mean_absolute_error(y_test, simple_ensemble_preds)

print(f"\n   For comparison: Simple Average (50/50) Test R2: {test_r2_simple:.4f}")
print(f"   For comparison: Simple Average (50/50) Test MAE: {test_mae_simple:.4f}")


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"




--- Training Linear Meta-Learner for Optimal Blending Weights ---
Target 1 Optimal Weights: XGB=1.267, LGBM=-0.217
Target 2 Optimal Weights: XGB=0.686, LGBM=0.262
Target 3 Optimal Weights: XGB=0.985, LGBM=0.211
Target 4 Optimal Weights: XGB=1.137, LGBM=0.003
Target 5 Optimal Weights: XGB=0.822, LGBM=0.372

--- Final Ensemble Prediction on Holdout Test Set ---

✅ Final Ensemble Test R2 Score (Optimal Blending): 0.5837
✅ Final Ensemble Test MAE (Optimal Blending): 8.1719

   For comparison: Simple Average (50/50) Test R2: 0.5676
   For comparison: Simple Average (50/50) Test MAE: 8.2994



    E.g. tree_method = "hist", device = "cuda"



In [37]:
best_model_xgb

In [38]:
import joblib
import os

# Create a folder for models if it doesn't exist
os.makedirs("models", exist_ok=True)

print("Saving models...")

# 1. Save the XGBoost Ensemble
joblib.dump(best_model_xgb, "models/xgboost_ensemble.pkl")

# 2. Save the LightGBM Ensemble
joblib.dump(best_model_lgb, "models/lightgbm_ensemble.pkl")

print("Models saved successfully in the 'models' folder!")

Saving models...
Models saved successfully in the 'models' folder!
