Hello fellow Kagglers,

This notebook is a first draft for this competition and is shared to get people started.

* V1 Shows the training process
* V2 Uses the precomputed DataFrames and pretrained model to make the inference
* V3 More plots and filtering data based on sample submission min/max
* V5 Excluding samples based on 0.1%< or >99.9% of train samples since sample submission is updated. Thanks to [Myrthe deen](https://www.kaggle.com/myrthedeen) for pointing this out.

# Imports

pip install numpy pandas matplotlib imageio albumentations torch torchmetrics scikit-learn torchvision timm tqdm


In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
print(os.listdir('/content/drive/MyDrive/planttraits2024'))


['sample_submission.csv', 'target_name_meta.tsv', 'test.csv', 'test_images', 'train.csv', 'train_images']


In [None]:
pip install timm



In [None]:
pip install torchmetrics




In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import imageio.v3 as  imageio
import albumentations as A

from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torchvision import transforms

import torch
import timm
import glob
import torchmetrics
import time
import psutil
import os
import math
import warnings

tqdm.pandas()

# Config

In [18]:
class Config():
    IMAGE_SIZE0 = 512
    IMAGE_SIZE = 288
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    TARGET_COLUMNS_TEST = ['X4', 'X11', 'X18', 'X50', 'X26', 'X3112']
    N_TARGETS = len(TARGET_COLUMNS)
    # Dataset
    RECOMPUTE_DATAFRAMES = True
    BATCH_SIZE = 24
    BATCH_SIZE_VAL = 128
    N_VAL_SAMPLES0 = 4096
    # Training
    LR_MAX = 3e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 12
    TRAIN_MODEL = True
    # Others
    IS_INTERACTIVE = False
    SEED = 42
    EPS = 1e-6

CONFIG = Config()

# Train DataFrame

In [None]:
pip install --upgrade jupyter ipywidgets




In [22]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
train_data_path = '/content/drive/MyDrive/planttraits2024/train.csv'
test_data_path = '/content/drive/MyDrive/planttraits2024/test.csv'
train_images_path = '/content/drive/MyDrive/planttraits2024/train_images'
test_images_path = '/content/drive/MyDrive/planttraits2024/test_images'

In [23]:
if CONFIG.RECOMPUTE_DATAFRAMES:
    train0 = pd.read_csv(train_data_path)

    # Add File Path
    train0['file_path'] = train0['id'].apply(lambda s: f'/content/drive/MyDrive/planttraits2024train_images/{s}.jpeg')

    # Reaed Raw Image JPEG Bytes
    train0['jpeg_bytes'] = train0['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())

    # Save for Future Use
    train0.to_pickle('/content/drive/MyDrive/planttraits2024/train.pkl')
else:
    train0 = pd.read_pickle('/content/drive/MyDrive/planttraits2024/train.pkl')

# Assign Medians
CONFIG.TARGET_MEDIANS = train0[CONFIG.TARGET_COLUMNS].median(axis=0).values

  0%|          | 0/55489 [00:00<?, ?it/s]

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/planttraits2024train_images/192027691.jpeg'

In [None]:
# Split train in train/val
train, val = train_test_split(train0, test_size=CONFIG.N_VAL_SAMPLES0, shuffle=True, random_state=CONFIG.SEED)

train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

# Display DataFrame
display(train.head(30))
display(train.info())

# Test DataFrame

In [None]:
if CONFIG.RECOMPUTE_DATAFRAMES:
    test = pd.read_csv(test_data_path)

    # Add File Path
    test['file_path'] = test['id'].apply(lambda s: f'planttraits2024/test_images/{s}.jpeg')

    # Reaed Raw Image JPEG Bytes
    test['jpeg_bytes'] = test['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())

    # Save for Future Use
    test.to_pickle('planttraits2024/test.pkl')
else:
    test = pd.read_pickle('/content/drive/MyDrive/test.pkl')

display(test.head())
display(test.info())

In [None]:
# Feature Columns
FEATURE_COLUMNS = test.columns.values[1:-2]
CONFIG.N_FEATURES = len(FEATURE_COLUMNS)
print(f'N_FEATURES: {CONFIG.N_FEATURES}')

# Sample Submission

In [None]:
train[CONFIG.TARGET_COLUMNS].quantile(0.001)

In [None]:
sample_submission = pd.read_csv('/content/drive/MyDrive/sample_submission.csv')

# Minimum/Maximum Based On Train 0.1% and 99.9%
CONFIG.V_MIN = train[CONFIG.TARGET_COLUMNS].quantile(0.001)
CONFIG.V_MAX = train[CONFIG.TARGET_COLUMNS].quantile(0.999)

display(sample_submission.head())
display(sample_submission.info())

# Image Plots

In [None]:
def plot_example(nrows=6, ncols=4):
    fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*5, nrows*5))
    for r in range(nrows):
        for c in range(ncols):
            idx = (r * ncols) + c
            img = imageio.imread(train['jpeg_bytes'][idx])
            image_id = train['id'][idx]
            axes[r,c].imshow(img)
            axes[r,c].set_title(f'{image_id} | shape: {img.shape}')

    plt.show()

plot_example()

# Labels

In [None]:
# Labels Meta Data
target_name_meta = pd.read_csv('/content/drive/MyDrive/target_name_meta.tsv', delimiter='\t')
target_name_meta['trait_ID'] = target_name_meta['trait_ID'] + '_mean'
target_name_meta = target_name_meta.set_index('trait_ID').squeeze().to_dict()

display(pd.Series(target_name_meta).to_frame())

In [None]:
# Percentiles of features to use
percentiles = [
    0.001, 0.01,0.05,0.10,0.25,
    0.50,
    0.75,0.90,0.95,0.99, 0.999,
]
labels_describe_df = pd.DataFrame()
for target in CONFIG.TARGET_COLUMNS:
    labels_describe_df = pd.concat((
        labels_describe_df,
        train[target].describe(percentiles=percentiles).round(3)
    ), axis=1)

# Transpose DataFrame
labels_describe_df = labels_describe_df.T

# Minimum/Maximum Values
labels_describe_df.insert(4, 'v_min', CONFIG.V_MIN)
labels_describe_df.insert(16, 'v_max', CONFIG.V_MAX)

display(labels_describe_df)

# Plot outliers

In [None]:
def plot_samples(target, df, n):
    rows = train0.sort_values(target, ascending=False).head(n)
    fig, axes = plt.subplots(1, n, figsize=(5*n, 5))
    plt.suptitle(f'{target} | {target_name_meta[target]}', size=24)
    for ax, (row_idx, row) in zip(axes, rows.iterrows()):
        ax.set_title(f'ID: {row.id}, {target}: {row[target]}', size=12)
        ax.imshow(imageio.imread(row['jpeg_bytes']))
        ax.axis('off')
    plt.show()

In [None]:
# As can be observer, whatever unit is used, the outliers are comically large
for target in CONFIG.TARGET_COLUMNS:
    plot_samples(target, train0, 5)

# Filter Outliers

In [None]:
# Mask to exclude values outside of 0.1% - 99.9% range
def get_mask(df):
    lower = []
    higher = []
    mask = np.empty(shape=df[CONFIG.TARGET_COLUMNS].shape, dtype=bool)
    # Fill mask based on minimum/maximum values of sample submission
    for idx, (t, v_min, v_max) in enumerate(zip(CONFIG.TARGET_COLUMNS, CONFIG.V_MIN, CONFIG.V_MAX)):
        labels = df[t].values
        mask[:,idx] = ((labels > v_min) & (labels < v_max))
    return mask.min(axis=1)

# Masks
CONFIG.MASK_TRAIN = get_mask(train)
CONFIG.MASK_VAL = get_mask(val)
# Masked DataFrames
train_mask = train[CONFIG.MASK_TRAIN].reset_index(drop=True)
val_mask = val[CONFIG.MASK_VAL].reset_index(drop=True)
# Add Number Of Steps
CONFIG.N_TRAIN_SAMPLES = len(train_mask)
CONFIG.N_VAL_SAMPLES = len(val_mask)
CONFIG.N_STEPS_PER_EPOCH = (CONFIG.N_TRAIN_SAMPLES // CONFIG.BATCH_SIZE)
CONFIG.N_VAL_STEPS_PER_EPOCH = math.ceil(CONFIG.N_VAL_SAMPLES / CONFIG.BATCH_SIZE_VAL)
CONFIG.N_STEPS = CONFIG.N_STEPS_PER_EPOCH * CONFIG.N_EPOCHS + 1

for m, subset in zip([CONFIG.MASK_TRAIN, CONFIG.MASK_VAL], ['train', 'val']):
    print(f'===== {subset} shape: {m.shape} =====')
    print(f'{subset} \t| # Masked Samples: {(1-m.mean())*CONFIG.N_TRAIN_SAMPLES:.0f}')
    print(f'{subset} \t| % Masked Samples: {100-m.mean()*100:.3f}%')

# Label Normalization

In [None]:
# Log Scale Features
LOG_FEATURES = ['X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']

In [None]:
# Fill labels using normalization tool
def fill_y(y, df, normalize=False):
    for target_idx, target in enumerate(CONFIG.TARGET_COLUMNS):
        v = df[target]
        if normalize:
            # Log10 Transform
            if target in LOG_FEATURES:
                v = np.log10(v)
            # Shift To Have Zero Median
            Y_SHIFT[target_idx] = np.mean(v)
            v = v - np.median(v)
            # Uniform Variance
            Y_STD[target_idx] = np.std(v)
            v = v / np.std(v)
        # Assign to y_train
        y[:,target_idx] = v

# Feature Scaler
Y_SHIFT = np.zeros(CONFIG.N_TARGETS)
Y_STD = np.zeros(CONFIG.N_TARGETS)
# Masked Labels
y_train_mask_raw = np.zeros_like(train_mask[CONFIG.TARGET_COLUMNS], dtype=np.float32)
y_train_mask = np.zeros_like(train_mask[CONFIG.TARGET_COLUMNS], dtype=np.float32)
y_val_mask = np.zeros_like(val_mask[CONFIG.TARGET_COLUMNS], dtype=np.float32)
# Fill Target Arrays
fill_y(y_train_mask_raw, train_mask, normalize=False)
fill_y(y_train_mask, train_mask, normalize=True)
fill_y(y_val_mask, val_mask, normalize=True)
# Values
display(pd.DataFrame({
    'y_shift': Y_SHIFT,
    'y_std': Y_STD
}, index=CONFIG.TARGET_COLUMNS))

In [None]:
def plot_target_distribution():
    fig, axes = plt.subplots(CONFIG.N_TARGETS, 3, figsize=(20, CONFIG.N_TARGETS*4))
    v_raw = train[CONFIG.TARGET_COLUMNS].values
    for (ax_raw, ax_mask, ax_norm), target, v_r, v_n in zip(axes, CONFIG.TARGET_COLUMNS, v_raw.T, y_train_mask.T):
        # Raw
        ax_raw.hist(v_r, bins=128)
        ax_raw.set_title(f'{target} Raw min: {v_r.min():.3f}, max: {v_r.max():.2e}, µ: {v_r.mean():.2e}, σ: {v_r.std():.2f}', size=10)
        # Masked
        v_m = v_r[CONFIG.MASK_TRAIN]
        ax_mask.hist(v_r, bins=128)
        ax_mask.set_title(f'{target} Masked min: {v_m.min():.3f}, max: {v_m.max():.2e}, µ: {v_m.mean():.2e}, σ: {v_m.std():.2f}', size=10)
        # Normalized
        ax_norm.hist(v_n, bins=128)
        ax_norm.set_title(f'{target} Norm min: {v_n.min():.3f}, max: {v_n.max():.2f}, µ: {v_n.mean():.2f}, σ: {v_n.std():.2f}', size=10)
    plt.subplots_adjust(hspace=0.25, wspace=0.30)
    plt.show()

plot_target_distribution()

 # Features

In [None]:
# Standard Scaler for Features
FEATURE_SCALER = StandardScaler()

# Fit and transform on training features
train_features_mask = FEATURE_SCALER.fit_transform(train_mask[FEATURE_COLUMNS].values.astype(np.float32))
# Transform val/test features using scaler fitted on train data
val_features_mask = FEATURE_SCALER.transform(val_mask[FEATURE_COLUMNS].values.astype(np.float32))
test_features = FEATURE_SCALER.transform(test[FEATURE_COLUMNS].values.astype(np.float32))
# Convert Features to Torch Tensors
train_features_mask = torch.tensor(train_features_mask)
val_features_mask = torch.tensor(val_features_mask)
test_features = torch.tensor(test_features)

# Transforms

In [None]:
MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])
# Training Augmentations
TRAIN_TRANSFORMS = A.Compose([
        A.RandomSizedCrop(
            [int(0.85*CONFIG.IMAGE_SIZE0), CONFIG.IMAGE_SIZE0],
            CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, w2h_ratio=1.0, p=1.0
        ),
        A.HorizontalFlip(p=0.50),
        A.RandomBrightnessContrast(brightness_limit=0.10, contrast_limit=0.10, p=0.50),
        A.ImageCompression(quality_lower=75, quality_upper=100, p=0.5),
        ToTensorV2(),
    ])
# Test Augmentations
VAL_TEST_TRANSFORMS = A.Compose([
        A.Resize(CONFIG.IMAGE_SIZE,CONFIG.IMAGE_SIZE),
        ToTensorV2(),
    ])

# Dataloader

In [None]:
class MyDataset(Dataset):
    def __init__(self, X_jpeg_bytes, y, features, transforms=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.y = y
        self.features = features
        self.transforms = transforms

    def __len__(self):
        return len(self.X_jpeg_bytes)

    def __getitem__(self, index):
        X_sample = {
            'image': self.transforms(
                    image=imageio.imread(self.X_jpeg_bytes[index]),
                )['image'],
            'feature': self.features[index],
        }
        y_sample = self.y[index]

        return X_sample, y_sample

In [None]:
import psutil
from torch.utils.data import DataLoader

# Assuming MyDataset is a custom dataset class you've defined elsewhere
# and CONFIG, TRAIN_TRANSFORMS, and VAL_TEST_TRANSFORMS are predefined

# Set a reasonable number of workers
num_workers = max(1, psutil.cpu_count() // 2)

# Train DataLoader
train_dataset = MyDataset(
    train_mask['jpeg_bytes'].values,
    y_train_mask,
    train_features_mask,
    TRAIN_TRANSFORMS,
)
train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    drop_last=True,  # Ensures that all batches have the same size
    num_workers=num_workers,  # Adjusted number of workers
    pin_memory=True if torch.cuda.is_available() else False  # Pin memory for GPU acceleration
)
train_dataloader_iter = iter(train_dataloader)

# Validation DataLoader
val_dataset = MyDataset(
    val_mask['jpeg_bytes'].values,
    y_val_mask,
    val_features_mask,
    VAL_TEST_TRANSFORMS,
)
val_dataloader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE_VAL,
    drop_last=False,  # It's okay to have the last batch smaller in validation
    num_workers=num_workers,  # Consistent number of workers
    pin_memory=True if torch.cuda.is_available() else False
)

# Test DataLoader
test_dataset = MyDataset(
    test['jpeg_bytes'].values,
    test['id'].values,
    test_features,
    VAL_TEST_TRANSFORMS,
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE_VAL,  # Typically, the batch size could be the same as for validation
    drop_last=False,  # Similar to validation
    num_workers=num_workers,
    pin_memory=True if torch.cuda.is_available() else False
)


import time
from tqdm import tqdm

# Benchmark Dataset
N = 10
t_start = time.perf_counter_ns()

# Reset the iterator to ensure we are measuring the same workload each time
train_dataloader_iter = iter(train_dataloader)

try:
    for _ in tqdm(range(N), desc="Measuring DataLoader Throughput"):
        next(train_dataloader_iter)
except StopIteration:
    print("Reached the end of the dataset before expected.")

n_images_per_second = (N * CONFIG.BATCH_SIZE) / ((time.perf_counter_ns() - t_start) * 1e-9)  # corrected time unit conversion
print(f'# Images/Second: {n_images_per_second:.0f}')


In [None]:
# Example batch
X_batch, y_batch = next(train_dataloader_iter)
for k, v in X_batch.items():
    print(f'X_batch {k} shape: {v.shape}, dtype: {v.dtype}')
    print(f'X_batch {k} min: {v.min():.3f}, max: {v.max():.3f}')
    print(f'X_batch {k} µ: {v.float().mean():.3f}, σ: {v.float().std():.3f}')
# Label
print(f'y_batch shape: {y_batch.shape}, dtype: {y_batch.dtype}')
print(f'y_batch min: {y_batch.min():.3f}, max: {y_batch.max():.3f}')
print(f'y_batch µ: {y_batch.mean():.3f}, σ: {y_batch.std():.3f}')

In [None]:
def plot_batch(nrows=6, ncols=4):
    fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*5, nrows*5))
    for r in range(nrows):
        for c in range(ncols):
            idx = (r * ncols) + c
            # Put image on CPU
            img = X_batch['image'][idx].swapaxes(0,2).detach().cpu().numpy()
            # Denormalize Image
            image_id = train['id'][idx]
            axes[r,c].imshow(img)
            axes[r,c].set_title(f'{image_id} | shape: {img.shape}')

    plt.show()

plot_batch()

# Model

In [None]:
def search_timm_model(query):
    search_result = [n for n in timm.list_models(pretrained=True) if query in n]
    for i, name in enumerate(search_result):
        print(f'{i:02d} | {name}')

search_timm_model('efficientvit')

In [None]:
# Count model parameters
def count_parameters(model):
    return sum([p.numel() for p in model.parameters()])

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        # ImageNet Normalize Input
        self.normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

        # Backbone
        self.backbone = timm.create_model(
                'efficientvit_b1.r288_in1k',
                pretrained=True,
                num_classes=0,
            )

        # Features
        self.features = nn.Sequential(
            nn.Linear(CONFIG.N_FEATURES,256),
            nn.GELU(),
            nn.Linear(256,256),
        )

        # Label
        self.label = nn.Sequential(
            nn.Linear(256,256),
            nn.GELU(),
            nn.Linear(256,CONFIG.N_TARGETS, bias=False),
        )

        # Initialize Weights
        self.initialize_weights()

    def initialize_weights(self):
        # Features
        nn.init.kaiming_uniform_(self.features[2].weight)
        # Label
        nn.init.zeros_(self.label[2].weight)

    def forward(self, inputs, debug=False):
        if debug:
            embedding = self.backbone(self.normalize(inputs['image'].float() / 255))
            features = self.features(inputs['feature'])
            label = self.label(embedding + features)
            return {
                'features': features,
                'embedding': embedding,
                'label': label,
            }
        else:
            return {
                'label': self.label(
                    self.backbone(self.normalize(inputs['image'].float() / 255)) + self.features(inputs['feature'])
                )
            }

In [None]:
# Clear torch cache
torch.cuda.empty_cache()

# Load Weights if model is not trained
if not CONFIG.TRAIN_MODEL:
    model = torch.load('/kaggle/input/planttraits2024-eda-dataset/model.pth')
else:
    # Create new Model
    model = Model()

# Model to GPU memory
model = model.to('cuda')

print(f'# Model Parameters: {count_parameters(model):,}')

with torch.no_grad():
    # Put inputs on GPU
    for k, v in X_batch.items():
        X_batch[k] = v.to('cuda')
    outputs = model(X_batch, debug=True)
    for k, v in outputs.items():
        print(f'outputs {k} shape: {v.shape}, min: {v.min():.3f}, max: {v.max():.3f}, µ: {v.mean():.3f}, σ: {v.std():.3f}')
    # Label Outputs
    for o in outputs['label'][:3,:]:
        print(o.detach().cpu().numpy().tolist())

# Learning Rate Schedule

In [None]:
# Get the learning rate scheduler
def get_lr_scheduler(optimizer):
    return torch.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer,
        max_lr=CONFIG.LR_MAX,
        total_steps=CONFIG.N_STEPS,
        pct_start=0.10,
        anneal_strategy='cos',
        div_factor=1e3,
        final_div_factor=1e4,
    )

In [None]:
# Plot Learning Rate Scheduler
def plot_lr_scheduler():
    lr_scheduler = get_lr_scheduler(torch.optim.Adam(model.parameters()))
    lrs  = []
    for step in range(CONFIG.N_STEPS):
        lrs.append(lr_scheduler.get_last_lr())
        lr_scheduler.step()
    # Plot Learning Rate
    plt.figure(figsize=(12,5))
    plt.title('Learning Rate Schedule')
    plt.xlim(0, CONFIG.N_STEPS)
    plt.ylim(0, CONFIG.LR_MAX*1.1)
    plt.xlabel('Step')
    plt.ylabel('Learning Rate')
    plt.plot(lrs)
    plt.grid()
    plt.show()
    # Reset Learning Rate Scheduler
    lr_scheduler._step_count = 0
    lr_scheduler.last_epoch = 0

plot_lr_scheduler()

# Metrics

In [None]:
# Average meter to keep track of metrics/loss during training
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        # Average is simply the sum divided by the count
        self.avg = self.sum / self.count

In [None]:
# Average meter to keep track of metrics/loss during training
class R2_METRIC(object):
    def __init__(self):
        self.reset()
        self.y_mean = torch.tensor(train0[CONFIG.TARGET_COLUMNS].median(axis=0).values).to('cuda')

    def reset(self):
        self.avg = torch.zeros(CONFIG.N_TARGETS).to('cuda')
        self.rss = torch.zeros(CONFIG.N_TARGETS).to('cuda')
        self.tss = torch.zeros(CONFIG.N_TARGETS).to('cuda')

    def update(self, y_pred, y_true, mean=False):
        self.rss += torch.sum((y_true - y_pred)**2, dim=0)
        self.tss += torch.sum((y_true - self.y_mean)**2, dim=0)
        self.avg = 1 - (self.rss / torch.maximum(self.tss, CONFIG.EPS_CUDA))

# Loss

In [None]:
# Y_SHIFT As Torch Tensor On GPU
Y_SHIFT_CUDA = torch.tensor(Y_SHIFT).to('cuda')
Y_STD_CUDA = torch.tensor(Y_STD).to('cuda')
# Is Log Feature Flag
IS_LOG_FEATURE = torch.tensor(np.isin(CONFIG.TARGET_COLUMNS, LOG_FEATURES)).to('cuda')

def denormalize(y_pred, y_true=None):
    # Scale Back
    y_pred = (y_pred * Y_STD_CUDA) + Y_SHIFT_CUDA
    # Log Scale
    y_pred = torch.where(IS_LOG_FEATURE, 10**y_pred, y_pred)
    # Optionally Denormalize y_true
    if y_true is not None:
        y_true = (y_true * Y_STD_CUDA) + Y_SHIFT_CUDA
        y_true = torch.where(IS_LOG_FEATURE, 10**y_true, y_true)
        return y_pred, y_true
    else:
        return y_pred

In [None]:
# Mean feature values used to compute R2 loss
Y_MEDIAN = torch.tensor(CONFIG.TARGET_MEDIANS).to('cuda')
# Total Variation
MEAN_VARIATION = torch.tensor(
        (CONFIG.TARGET_MEDIANS - y_train_mask_raw)
    ).abs().mean(dim=0).to('cuda')
# R2 Loss
def r2_loss_fn(y_pred, y_true):
    B = len(y_pred)
    # Compute column wise sum of residuals and totals
    ss_res = (y_true - y_pred)**2
    ss_total = (y_true - Y_MEDIAN)**2
    # r2 ranging from 0 to infinity
    loss = torch.sum(ss_res, dim=0) / torch.maximum(torch.sum(ss_total, dim=0), CONFIG.EPS_CUDA)
    # Return Mean Of Loss
    return torch.mean(loss)

r2_loss_fn(denormalize(outputs['label']), denormalize(y_batch.to('cuda')))

In [None]:
def validation_step():
    # Loss Function
    R2_LOSS_FN = r2_loss_fn
    # Put model in evaluation mode
    model.eval()
    # Metrics Trackers
    R2 = R2_METRIC()
    R2_LOSS = AverageMeter()
    # Iterave Over Validation Set
    for step, (X_sample, y_true) in enumerate(val_dataloader):
        y_true = y_true.to('cuda')
        # Put label on GPU
        with torch.no_grad():
            for k, v in X_sample.items():
                X_sample[k] = v.to('cuda')
            # Forward Pass
            y_pred = model(X_sample)['label']
        # Denormalize
        y_pred_raw, y_true_raw = denormalize(y_pred, y_true)
        # Loss
        r2_loss = R2_LOSS_FN(y_pred_raw, y_true_raw)
        # Update Loss Metrics
        R2_LOSS.update(r2_loss)
        # Update Metrics
        R2.update(y_pred_raw, y_true_raw)
        # Logs
        r2_str = ", ".join(
            [f"{f}: {v:+.3f}" for f, v in zip(CONFIG.TARGET_COLUMNS_TEST, R2.avg)
        ])
        if not CONFIG.IS_INTERACTIVE and (step + 1) == CONFIG.N_VAL_STEPS_PER_EPOCH:
            print(
                f'VAL | R2 loss: {R2_LOSS.avg:.4f}, R2: {R2.avg.mean():.3f}, {r2_str}' + (' ' * 10)
            )
        elif CONFIG.IS_INTERACTIVE:
            print(
                f'\rVAL {step+1:02d}/{CONFIG.N_VAL_STEPS_PER_EPOCH} | R2 loss: {R2_LOSS.avg:.4f}, ' +
                f'R2: {R2.avg.mean():.3f}, {r2_str}' + (' ' * 10),
                end='\n' if (step + 1) == CONFIG.N_VAL_STEPS_PER_EPOCH else '', flush=True,
            )

validation_step()

# Training

In [None]:
# Loss
R2_LOSS_FN = r2_loss_fn
# Optimizer
optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=CONFIG.LR_MAX,
    weight_decay=CONFIG.WEIGHT_DECAY,
)
# Learning Rate Scheduler
LR_SCHEDULER = get_lr_scheduler(optimizer)
# Metrics Trackers
R2 = R2_METRIC()
R2_LOSS = AverageMeter()

In [None]:
if CONFIG.TRAIN_MODEL:
    for epoch in range(CONFIG.N_EPOCHS):
        # Reset Metrics
        R2.reset()
        R2_LOSS.reset()
        # Put model in training mode
        model.train()
        # Iterate Over Training Dataloader
        for step, (X_batch, y_true) in enumerate(train_dataloader):
            # Put batch on GPU
            for k, v in X_batch.items():
                X_batch[k] = v.to('cuda')
            y_true = y_true.to('cuda')
            # Step Time
            t_start = time.perf_counter_ns()
            # Forward Pass
            y_pred = model(X_batch)['label']
            # Denormalize
            y_pred_raw, y_true_raw = denormalize(y_pred, y_true)
            # Loss
            r2_loss = R2_LOSS_FN(y_pred_raw, y_true_raw)
            # Update Loss Metrics
            R2_LOSS.update(r2_loss)
            # Compute Gradients
            r2_loss.backward()
            # Backward Pass
            optimizer.step()
            # Zero Out Gradients
            optimizer.zero_grad()
            # Update Metrics
            R2.update(y_pred_raw, y_true_raw)
            # Compute R2 Metrics String
            r2_str = ", ".join([
                f"{f}: {v:+.3f}" for f, v in zip(CONFIG.TARGET_COLUMNS_TEST, R2.avg)
            ])
            # Logs
            if not CONFIG.IS_INTERACTIVE and (step + 1) == CONFIG.N_STEPS_PER_EPOCH:
                print(
                    f'EPOCH {epoch+1:02d} {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' +
                    f'R2 loss: {R2_LOSS.avg:.4f}, R2: {R2.avg.mean():+.3f}, {r2_str}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                )
            elif CONFIG.IS_INTERACTIVE:
                print(
                    f'\rEPOCH {epoch+1:02d} {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' +
                    f'R2 loss: {R2_LOSS.avg:.4f}, R2: {R2.avg.mean():+.3f}, {r2_str}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                    end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
                )
            # Learning Rate Scheduler Step
            LR_SCHEDULER.step()
        # Validation Step
        validation_step()

# Save entire model object
torch.save(model, 'model.pth')

In [None]:
# Save entire model object
torch.save(model, 'model.pth')

# Test

In [None]:
# Minimum And Maximum Values To Clip Predictions
TARGET_MIN = train0[CONFIG.TARGET_COLUMNS].values.min(axis=0)
TARGET_MAX = train0[CONFIG.TARGET_COLUMNS].values.max(axis=0)
# Submission Rows
SUBMISSION_ROWS = []
# Put Model in Evaluation Mode
model.eval()
for i, (X_sample_test, test_id) in enumerate(tqdm(test_dataset)):
    # Only 100 predictions in interactive mode
    if CONFIG.IS_INTERACTIVE and i == 100:
        break
    # Put sample on GPU and add batch dimension
    for k, v in X_sample_test.items():
        X_sample_test[k] = v.to('cuda').unsqueeze(0)
    # Prediction without gradients
    with torch.no_grad():
        y_pred = model(X_sample_test)['label']
    # Reverse Scaling
    y_pred, _ = denormalize(y_pred, y_pred)
    y_pred = y_pred.detach().cpu().numpy().squeeze()
    # Clip Values
    y_pred = np.clip(y_pred, TARGET_MIN, TARGET_MAX)
    # Add To Rows
    row = { 'id': test_id }
    # Add Predictions column by column
    for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred):
        # Remove "_mean" part of target column
        row[k.replace('_mean', '')] = v
    # Add To Submission Rows
    SUBMISSION_ROWS.append(row)

In [None]:
# Make Submission CSV
submission_df = pd.DataFrame(SUBMISSION_ROWS)

display(submission_df.head(30))

# Make
submission_df.to_csv('submission.csv', index=False)