# 1. Introduction

This notebook shows the code that has been develop to create, configure, train, and cross-validate the deep learning model for the Kaggle competition titled [Beyond Visible Spectrum: AI for Agriculture 2025](https://www.kaggle.com/competitions/beyond-visible-spectrum-ai-for-agriculture-2025/leaderboard).

# 2. Importing Libraries

In [None]:
import os
import torch
import torch.backends.cudnn as cudnn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F
import glob
from sklearn.model_selection import train_test_split
from torchvision.transforms import v2
from torchinfo import summary
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts
from pathlib import Path
from tqdm.auto import tqdm
from sklearn.utils import resample

# Import custom libraries
from utils.classification_utils import set_seeds
from engines.regression import RegressionEngine
from models.vision_transformer import SpectralViT, ViT
from engines.schedulers import FixedLRSchedulerWrapper
from engines.common import Common
from dataloaders.hyperspectral_dataloaders import create_dataloaders, MeanStdNormalize, HyperspectralAugmentation

# Dataset
from datasets import load_dataset

import warnings
os.environ['TORCH_USE_CUDA_DSA'] = "1"
warnings.filterwarnings("ignore", category=UserWarning, module="torch.autograd.graph")
warnings.filterwarnings("ignore", category=FutureWarning, module="onnxscript.converter")

# 3. Importing Dataset

In [None]:
# Define target data directory
TARGET_DIR_NAME = f"data/ai_agriculture"

# Setup training and test directories
TARGET_DIR = Path(TARGET_DIR_NAME)
TRAIN_DIR = TARGET_DIR / "ot" / "ot"
VAL_DIR = TARGET_DIR / "ot" / "ot"
TEST_DIR = TARGET_DIR / "ot" / "ot"
ORIG_LABELS = TARGET_DIR / "train2.csv"
TRAIN_LABELS = TARGET_DIR / "train_split.csv"
VAL_LABELS = TARGET_DIR / "val_split.csv"
TEST_LABELS = TARGET_DIR / "test.csv"
TARGET_DIR.mkdir(parents=True, exist_ok=True)

# Create target model directory
MODEL_DIR = Path("outputs")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# Set seeds
SEED = 42
set_seeds(SEED)

In [None]:
def resample_data(df, target_count):

    label_counts = df['label'].value_counts()
    resampled_dfs = []

    for label in sorted(label_counts.index):
        group = df[df['label'] == label]
        if len(group) < target_count:
            # Oversample with replacement
            group_resampled = resample(group, replace=True, n_samples=target_count, random_state=42)
        else:
            # Keep as is (or downsample if desired)
            group_resampled = group
        resampled_dfs.append(group_resampled)

    # Concatenate all to form balanced train_df
    return pd.concat(resampled_dfs).sample(frac=1, random_state=42).reset_index(drop=True)


In [None]:
# Define validation size in percentage of the total size
VAL_SIZE = 0.2

# Load your CSV
df = pd.read_csv(ORIG_LABELS)  # Make sure train.csv is in the same directory or provide the full path
#df = resample_data(df, 25)

# Split the dataframe
train_df, val_df = train_test_split(df, test_size=VAL_SIZE, random_state=42, stratify=df['label'])

In [None]:
# Plot training data distribution
sns.histplot(train_df['label'], bins=101, kde=False)
plt.title('Histogram of train_df["label"]')
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.show()

# 4. Specifying the Target Device

In [None]:
# Activate cuda benchmark
cudnn.benchmark = True

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

if device == "cuda":
    !nvidia-smi

# 5. Preparing Dataloaders and Preprocessing

In [None]:
def compute_dataset_stats(dataloader):
    min_val = float('inf')
    max_val = float('-inf')
    sum_vals = 0.0
    sum_squared = 0.0
    n_pixels = 0

    for (images, _) in tqdm(dataloader, desc="Computing stats"):
        # Flatten to (batch_size, channels * H * W)
        flat = images.view(images.size(0), -1)

        min_val = min(min_val, flat.min().item())
        max_val = max(max_val, flat.max().item())
        sum_vals += flat.sum().item()
        sum_squared += (flat ** 2).sum().item()
        n_pixels += flat.numel()

    mean = sum_vals / n_pixels
    std = (sum_squared / n_pixels - mean ** 2) ** 0.5

    return {
        "min": min_val,
        "max": max_val,
        "mean": mean,
        "std": std
    }

def compute_scaled_stats_2(dataloader):
    s1, s2, n = 0.0, 0.0, 0
    for imgs, _ in dataloader:
        # imgs is (B, C, H, W) in [0,1] now
        s1 += imgs.sum().item()
        s2 += (imgs**2).sum().item()
        n  += imgs.numel()
    mu = s1 / n
    sigma = (s2 / n - mu*mu)**0.5
    return mu, sigma

#mu_scaled, sigma_scaled = compute_scaled_stats(scaled_loader)
#print("After scaling to [0,1]: mean =", mu_scaled, " std =", sigma_scaled)

def compute_max_log(dataloader):
    max_val = -float('inf')
    for batch, _ in dataloader:
        max_val = max(max_val, batch.max().item())
    return max_val

In [None]:
stats = {
    'min': -0.011533993296325207,
    'max': 28906.0,
    'mean': 1385.8839183006537,
    'std': 1117.384644793575
    }

stats_scaled = {
    'mean': 0.05120476204006151,
    'std': 0.0377356291529548
    }

MAX_RAW_VALUE = stats['max']
MU_SCALED = stats_scaled['mean']
SIGMA_SCALED = stats_scaled['std']

In [None]:
# Training parameters
IMG_SIZE_2 = 128
BATCH_SIZE = 32
AUG_LEVEL = 'high'

# Augmentation transformations
transforms_train = v2.Compose([
    HyperspectralAugmentation(level=AUG_LEVEL, mode='train'),        
    v2.Lambda(lambda t: t.float() / MAX_RAW_VALUE),
    MeanStdNormalize(mean=MU_SCALED, std=SIGMA_SCALED),
])
transforms_val = v2.Compose([
    HyperspectralAugmentation(level=AUG_LEVEL, mode='validation'),        
    v2.Lambda(lambda t: t.float() / MAX_RAW_VALUE),
    MeanStdNormalize(mean=MU_SCALED, std=SIGMA_SCALED),
])

# Dataloader
train_dataloader, test_dataloader = create_dataloaders(
    train_dir=TRAIN_DIR,
    test_dir=VAL_DIR,
    train_labels=TRAIN_LABELS,
    test_labels=VAL_LABELS,
    train_transform=transforms_train,
    test_transform=transforms_val,
    batch_size=BATCH_SIZE,
    num_workers=0,
    output_type='reg'
)

# 6. Image Visualization

In [None]:
def visualize_hyperspectral_batch(dataloader, num_images=10, num_channels=5):
    
    # Get a batch of data
    data_iter = iter(dataloader)
    images, labels = next(data_iter)
    images, labels = next(data_iter)
    
    min_vals = images.view(images.size(0), -1).min(dim=1)[0]
    max_vals = images.view(images.size(0), -1).max(dim=1)[0]

    # Make sure we don't exceed available images or channels
    num_images = min(num_images, images.shape[0])
    num_channels = min(num_channels, images.shape[1])
    
    fig, axes = plt.subplots(num_images, num_channels, figsize=(num_channels * 4, num_images * 4))
    fig.suptitle("Hyperspectral Channels per Image", fontsize=16)
    
    for i in range(num_images):
        for j in range(num_channels):
            ax = axes[i, j] if num_images > 1 else axes[j]
            img = images[i, j].cpu().numpy()
            ax.imshow(img, cmap='terrain')
            #ax.set_title(f"Label: {labels[i].item()}")
            ax.axis('off')
    
    #plt.savefig("augmentation.png", bbox_inches='tight')

    plt.show()

In [None]:
visualize_hyperspectral_batch(train_dataloader, num_images=2, num_channels=5)

# 7. Creating a Spectral Vision Transformer (ViT) Model

In [None]:
# Create the ViT-Base model
NUM_METRICS = 1 #only one target metric
images, labels = next(iter(train_dataloader))
KERNEL_SIZE = (images.shape[1], 1, 1)

In [None]:
# Create custom Vision Transformer Tiny (ViT-Tiny) model
model = ViT(
    img_size=IMG_SIZE_2,
    in_channels=KERNEL_SIZE[0],
    patch_size=8,
    num_transformer_layers=4,
    emb_dim=256,
    mlp_size=512,
    num_heads=8,
    attn_dropout=0.1,
    mlp_dropout=0.1,
    emb_dropout=0.1,
    num_classes=NUM_METRICS
)

# Print summary
summary(
    model,
    input_size=(BATCH_SIZE, KERNEL_SIZE[0], IMG_SIZE_2, IMG_SIZE_2),
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"])

# 8. Training the Model

In [None]:
import torch
import torch.nn.functional as F

class MSEWithVarianceLoss(torch.nn.Module):
    def __init__(self, lambda_var=3.67e-5):
        super().__init__()
        self.mse = torch.nn.MSELoss()
        self.lambda_var = lambda_var  # weight for variance term

    def forward(self, predictions, targets):
        mse_loss = self.mse(predictions, targets)
        variance = torch.var(predictions)

        # We subtract variance because we want to maximize it (encourage spread)
        variance_penalty = torch.clamp(self.lambda_var * variance, max=0.99 * mse_loss)
        loss = mse_loss - variance_penalty
        return loss

In [None]:
# Train the model
EPOCHS = 310 
LR = 1e-6
model_type="model_vit-t_reg_high_310_loss_epoch203"
model_name = model_type + ".pth"

# Create AdamW optimizer
optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=LR,
    betas=(0.9, 0.999),
    weight_decay=0.01
)

# Create loss function
loss_fn = torch.nn.MSELoss()
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=LR/100)

# Set seeds
set_seeds(SEED)

# And train...

# Instantiate the classification engine with the created model and the target device
engine = RegressionEngine(
    model=model,
    color_map={'train': 'blue', 'test': 'red'},
    log_verbose=True,
    device=device)

# Configure the training method
results = engine.train(
    target_dir=MODEL_DIR,                       # Directory where the model will be saved
    model_name=model_name,                      # Name of the model
    save_best_model=["loss", "last", "r2"],     # Save the best models based on different criteria
    keep_best_models_in_memory=False,           # Do not keep the models stored in memory for the sake of training time and memory efficiency
    train_dataloader=train_dataloader,          # Train dataloader
    test_dataloader=test_dataloader,            # Validation/test dataloader
    apply_validation=True,                      # Enable validation step
    optimizer=optimizer,                        # Optimizer
    loss_fn=loss_fn,                            # Loss function    
    scheduler=scheduler,                        # Scheduler
    epochs=EPOCHS,                              # Total number of epochs
    amp=True,                                   # Enable Automatic Mixed Precision (AMP)
    enable_clipping=False,                      # Disable clipping on gradients, only useful if training becomes unestable
    debug_mode=False,                           # Disable debug mode    
    accumulation_steps=1,                       # Accumulation steps 2: effective batch size = batch_size x accumulation steps
    )

Best performing model at epoch 203

In [None]:
# Fine-tuning 1

EPOCHS = 150
LR = 1e-7
model_type="model_vit-t_reg_high_310_from_203"
model_name = model_type + ".pth"

model.load_state_dict(torch.load(os.path.join(MODEL_DIR, "model_vit-t_reg_high_310_loss_epoch203.pth")))

# Create AdamW optimizer
optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=LR,
    betas=(0.9, 0.999),
    weight_decay=0.01
)

# Create loss function
loss_fn = torch.nn.MSELoss()  # or L1Loss, SmoothL1Loss, HuberLoss, etc.

# And scheduler
#scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=LR/100)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=LR/100)

# Set seeds
set_seeds(SEED)

# And train...

# Instantiate the classification engine with the created model and the target device
engine = RegressionEngine(
    model=model,
    color_map={'train': 'blue', 'test': 'magenta'},
    log_verbose=True,
    device=device)

# Configure the training method
results = engine.train(
    target_dir=MODEL_DIR,                       # Directory where the model will be saved
    model_name=model_name,                      # Name of the model
    save_best_model=["loss", "last", "r2"],     # Save the best models based on different criteria
    keep_best_models_in_memory=False,           # Do not keep the models stored in memory for the sake of training time and memory efficiency
    train_dataloader=train_dataloader,          # Train dataloader
    test_dataloader=test_dataloader,            # Validation/test dataloader
    apply_validation=True,                      # Enable validation step
    optimizer=optimizer,                        # Optimizer
    loss_fn=loss_fn,                            # Loss function    
    scheduler=scheduler,                        # Scheduler
    epochs=EPOCHS,                              # Total number of epochs
    amp=True,                                   # Enable Automatic Mixed Precision (AMP)
    enable_clipping=False,                      # Disable clipping on gradients, only useful if training becomes unestable
    debug_mode=False,                           # Disable debug mode    
    accumulation_steps=1,                       # Accumulation steps 2: effective batch size = batch_size x accumulation steps
    )

In [None]:
# Fine tuning 2

EPOCHS = 630
LR = 1e-7
model_type="model_vit-t_reg_high_1050"
model_name = model_type + ".pth"
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, os.path.join(MODEL_DIR, 'checkpoint_epoch150.pth'))

checkpoint = torch.load(os.path.join(MODEL_DIR, 'checkpoint_epoch150.pth'))
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Manually override LR in the optimizer
for param_group in optimizer.param_groups:
    param_group['lr'] = LR 

scheduler = CosineAnnealingWarmRestarts(
    optimizer,
    T_0=10,        
    T_mult=2,      
    eta_min=LR/100 
)

# Instantiate the classification engine with the created model and the target device
engine = RegressionEngine(
    model=model,
    color_map={'train': 'blue', 'test': 'orange'},
    log_verbose=True,
    device=device)

# Configure the training method
results = engine.train(
    target_dir=MODEL_DIR,                       # Directory where the model will be saved
    model_name=model_name,                      # Name of the model
    save_best_model=["loss", "last", "r2"],     # Save the best models based on different criteria
    keep_best_models_in_memory=False,           # Do not keep the models stored in memory for the sake of training time and memory efficiency
    train_dataloader=train_dataloader,          # Train dataloader
    test_dataloader=test_dataloader,            # Validation/test dataloader
    apply_validation=True,                      # Enable validation step
    optimizer=optimizer,                        # Optimizer
    loss_fn=loss_fn,                            # Loss function    
    scheduler=scheduler,                        # Scheduler
    epochs=EPOCHS,                              # Total number of epochs
    amp=True,                                   # Enable Automatic Mixed Precision (AMP)
    enable_clipping=False,                      # Disable clipping on gradients, only useful if training becomes unestable
    debug_mode=False,                           # Disable debug mode    
    accumulation_steps=1,                       # Accumulation steps 2: effective batch size = batch_size x accumulation steps
    )

In [None]:
# Fine tuning 3

EPOCHS = 630
LR = 5e-6
model_type="model_vit-t_reg_high_1050_2"
model_name = model_type + ".pth"

checkpoint = torch.load(os.path.join(MODEL_DIR, 'checkpoint_epoch150.pth'))
model.load_state_dict(checkpoint['model_state_dict'])

# Create AdamW optimizer
optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=LR,
    betas=(0.9, 0.999),
    weight_decay=0.01
)

scheduler = CosineAnnealingWarmRestarts(
    optimizer,
    T_0=10,        
    T_mult=2,      
    eta_min=LR/100 
)

# Instantiate the classification engine with the created model and the target device
engine = RegressionEngine(
    model=model,
    color_map={'train': 'blue', 'test': 'orange'},
    log_verbose=True,
    device=device)

# Configure the training method
results = engine.train(
    target_dir=MODEL_DIR,                       # Directory where the model will be saved
    model_name=model_name,                      # Name of the model
    save_best_model=["loss", "last", "r2"],     # Save the best models based on different criteria
    keep_best_models_in_memory=False,           # Do not keep the models stored in memory for the sake of training time and memory efficiency
    train_dataloader=train_dataloader,          # Train dataloader
    test_dataloader=test_dataloader,            # Validation/test dataloader
    apply_validation=True,                      # Enable validation step
    optimizer=optimizer,                        # Optimizer
    loss_fn=loss_fn,                            # Loss function    
    scheduler=scheduler,                        # Scheduler
    epochs=EPOCHS,                              # Total number of epochs
    amp=True,                                   # Enable Automatic Mixed Precision (AMP)
    enable_clipping=False,                      # Disable clipping on gradients, only useful if training becomes unestable
    debug_mode=False,                           # Disable debug mode    
    accumulation_steps=2,                       # Accumulation steps 2: effective batch size = batch_size x accumulation steps
    )

# 9. Making predictions on the Testset

In [None]:
# Find the model file with "model_loss_epoch" prefix and rename it
def rename_model(model_name: str, new_name: str):
    old_name = model_name[0]
    os.rename(old_name, new_name)
    print(f"Renamed {old_name} to {new_name}")

#model_name = glob.glob(str(MODEL_DIR / f"{model_type}_epoch*.pth"))
#new_model_name = str(MODEL_DIR / f"{model_type}_2.pth")
#rename_model(model_name, new_model_name)

# Transformations
transforms = v2.Compose([        
    v2.Lambda(lambda t: t.float() / MAX_RAW_VALUE),
    MeanStdNormalize(mean=MU_SCALED, std=SIGMA_SCALED),
])

# Instantiate the model
model = ViT(
    img_size=IMG_SIZE_2,
    in_channels=125, #images.shape[1],
    patch_size=8,
    num_transformer_layers=4, #12,
    emb_dim=256, #1024,
    mlp_size=512, #4096,
    num_heads=8, #16,
    attn_dropout=0.1, #0,
    mlp_dropout=0.1,
    emb_dropout=0.1,
    num_classes=NUM_METRICS
)

# Load the model checkpoint weights
model = Common().load_model(model, "outputs", f"model_vit-t_reg_high_1050_2_epoch13.pth").to(device)

# Prepare the model in evaluation mode
model.eval()
model.to(device)

test_df = pd.read_csv(TEST_LABELS)
val_df = pd.read_csv(VAL_LABELS)

ids    = []
values = []
submission_dict = []

with torch.no_grad():
    for sample_id in test_df['id']:
        # load the hyperspectral cube from .npy
        npy_path = os.path.join(TEST_DIR, f"{sample_id}")
        data = np.load(npy_path)                      # e.g. shape (128,128,125)
        
        image = data.astype(np.float32)

        # Convert to tensor and permute to (C, H, W)
        image = torch.from_numpy(image).permute(2, 0, 1)  # Shape: (125, 128, 128)

        # Resize spatial dimensions to (128, 128)
        image = F.interpolate(image.unsqueeze(0), size=(128, 128), mode='bilinear', align_corners=False)

        # Peform pre-processing        
        image = transforms(image)

        # Make predictions
        out = model(image.to(device))
        out = out.squeeze(0).cpu().numpy()        

        # Scale the outputs to the original reange
        out = out * 99.0 + 1
                
        ids.append(sample_id)
        values.append(out.item())

# build submission DataFrame
submission_df = pd.DataFrame({
    'ID':    ids,
    'label': values
})

# save to CSV
submission_df.to_csv(f"submission_model_vit-t_reg_high_1050_2_epoch13.csv", index=False)