Goal: Understand the flow, clean addition of validation and hyperparameter tuning

Modified from HDJOJO's original notebook with SWIN Transformer

In [1]:
import numpy as np
import pandas as pd
import random
import copy
import matplotlib.pyplot as plt

# albumentations better than torchvision
import imageio.v3 as imageio
import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import sampler

from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler

import timm
import glob
import torchmetrics
import time
import psutil
import os

tqdm.pandas()

In [2]:
class Config():
    IMAGE_SIZE = 256
#     BACKBONE = 'swin_large_patch4_window12_384.ms_in22k_ft_in1k'
    BACKBONE = 'swinv2_small_window16_256'
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    N_TARGETS = len(TARGET_COLUMNS)
    BATCH_SIZE = 16 # Sample: 96
    LR_MAX = 1e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 6 # Sample: 12
    TRAIN_MODEL = True
    IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
    
    DEVICE = "cuda"
    NUM_WORKERS = psutil.cpu_count()
    
    # Added variables
    NUM_FOLDS = 5
    VALID_FOLD = 0  # Fold of validation data
        
CONFIG = Config()

In [3]:
# Read in training data
train_df = pd.read_csv('/kaggle/input/planttraits2024/train.csv')
train_df['file_path'] = train_df['id'].apply(lambda s: f'/kaggle/input/planttraits2024/train_images/{s}.jpeg')
train_df['jpeg_bytes'] = train_df['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())
train_df.to_pickle('train.pkl') # serialize object into string form

  0%|          | 0/55489 [00:00<?, ?it/s]

### Data Filtering

In [4]:
# Sampled training set for faster training
train_df.head()

Unnamed: 0,id,WORLDCLIM_BIO1_annual_mean_temperature,WORLDCLIM_BIO12_annual_precipitation,WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month,WORLDCLIM_BIO15_precipitation_seasonality,WORLDCLIM_BIO4_temperature_seasonality,WORLDCLIM_BIO7_temperature_annual_range,SOIL_bdod_0.5cm_mean_0.01_deg,SOIL_bdod_100.200cm_mean_0.01_deg,SOIL_bdod_15.30cm_mean_0.01_deg,...,X50_mean,X3112_mean,X4_sd,X11_sd,X18_sd,X26_sd,X50_sd,X3112_sd,file_path,jpeg_bytes
0,192027691,12.235703,374.466675,62.524445,72.256844,773.592041,33.277779,125,149,136,...,1.849375,50.216034,0.008921,1.601473,0.025441,0.153608,0.27961,15.045054,/kaggle/input/planttraits2024/train_images/192...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,195542235,17.270555,90.239998,10.351111,38.22094,859.193298,40.009777,124,144,138,...,1.353468,574.098472,0.003102,0.258078,0.000866,0.03463,0.010165,11.004477,/kaggle/input/planttraits2024/train_images/195...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,196639184,14.254504,902.071411,49.642857,17.873655,387.977753,22.807142,107,133,119,...,2.343153,1130.096731,,,,,,,/kaggle/input/planttraits2024/train_images/196...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,195728812,18.680834,1473.93335,163.100006,45.009758,381.053986,20.436666,120,131,125,...,1.155308,1042.686546,0.011692,2.818356,0.110673,0.011334,0.229224,141.857187,/kaggle/input/planttraits2024/train_images/195...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,195251545,0.673204,530.088867,50.857777,38.230709,1323.526855,45.891998,91,146,120,...,2.246226,2386.46718,0.006157,1.128,0.026996,0.553815,0.107092,87.146899,/kaggle/input/planttraits2024/train_images/195...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [5]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=CONFIG.NUM_FOLDS, shuffle=True, random_state=42)

# Create separate bin for each traits
for i, trait in enumerate(CONFIG.TARGET_COLUMNS):
    # Determine the bin edges dynamically based on the distribution of traits
    bin_edges = np.percentile(train_df[trait], np.linspace(0, 100, CONFIG.NUM_FOLDS + 1))
    train_df[f"bin_{i}"] = np.digitize(train_df[trait], bin_edges)

# Concatenate the bins into a final bin
train_df["final_bin"] = (
    train_df[[f"bin_{i}" for i in range(CONFIG.N_TARGETS)]]
    .astype(str)
    .agg("".join, axis=1)
)

# Perform the stratified split using final bin
train_df = train_df.reset_index(drop=True)
for fold, (train_idx, valid_idx) in enumerate(skf.split(train_df, train_df["final_bin"])):
    train_df.loc[valid_idx, "fold"] = fold
    
train_df.head()



Unnamed: 0,id,WORLDCLIM_BIO1_annual_mean_temperature,WORLDCLIM_BIO12_annual_precipitation,WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month,WORLDCLIM_BIO15_precipitation_seasonality,WORLDCLIM_BIO4_temperature_seasonality,WORLDCLIM_BIO7_temperature_annual_range,SOIL_bdod_0.5cm_mean_0.01_deg,SOIL_bdod_100.200cm_mean_0.01_deg,SOIL_bdod_15.30cm_mean_0.01_deg,...,file_path,jpeg_bytes,bin_0,bin_1,bin_2,bin_3,bin_4,bin_5,final_bin,fold
0,192027691,12.235703,374.466675,62.524445,72.256844,773.592041,33.277779,125,149,136,...,/kaggle/input/planttraits2024/train_images/192...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2,2,1,4,2,1,221421,2.0
1,195542235,17.270555,90.239998,10.351111,38.22094,859.193298,40.009777,124,144,138,...,/kaggle/input/planttraits2024/train_images/195...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,3,3,2,2,2,3,332223,1.0
2,196639184,14.254504,902.071411,49.642857,17.873655,387.977753,22.807142,107,133,119,...,/kaggle/input/planttraits2024/train_images/196...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,5,1,5,5,2,3,515523,0.0
3,195728812,18.680834,1473.93335,163.100006,45.009758,381.053986,20.436666,120,131,125,...,/kaggle/input/planttraits2024/train_images/195...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,3,2,3,2,1,3,323213,4.0
4,195251545,0.673204,530.088867,50.857777,38.230709,1323.526855,45.891998,91,146,120,...,/kaggle/input/planttraits2024/train_images/195...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2,3,3,5,4,4,233544,2.0


In [6]:
train = train_df[train_df["fold"] != CONFIG.VALID_FOLD]
valid = train_df[train_df["fold"] == CONFIG.VALID_FOLD] # Fold 0 is validation
train[CONFIG.TARGET_COLUMNS + ["fold"]].describe()

Unnamed: 0,X4_mean,X11_mean,X18_mean,X50_mean,X26_mean,X3112_mean,fold
count,44391.0,44391.0,44391.0,44391.0,44391.0,44391.0,44391.0
mean,0.522456,127.1709,24600.4,12.810444,3096.704,493829.3,2.499966
std,0.176001,12379.79,2582362.0,1313.424294,221043.6,102327000.0,1.118037
min,-2.431157,6.78e-05,2.33e-08,9.7e-05,5.5e-07,7.69e-08,1.0
25%,0.410739,10.6356,0.3099867,1.174045,0.5595144,255.2807,1.5
50%,0.509275,15.12003,0.7171231,1.48013,2.529542,725.8266,2.0
75%,0.622427,19.68705,3.574691,1.924787,14.98396,2158.052,3.0
max,4.475172,1504254.0,272049400.0,159759.8977,31065550.0,21559110000.0,4.0


In [7]:
class PlantDataPreProcess:
    lower_quantile = 0.005
    upper_quantile = 0.995
    log_transform = np.log10

In [8]:
# Filter data
print("Num samples before filtering:", len(train))

for trait in CONFIG.TARGET_COLUMNS:
    lower_bound = train[trait].quantile(PlantDataPreProcess.lower_quantile)
    upper_bound = train[trait].quantile(PlantDataPreProcess.upper_quantile)
    train = train[(train[trait] >= lower_bound) & (train[trait] <= upper_bound)]
    
print("Num samples After filtering:", len(train))
train[CONFIG.TARGET_COLUMNS].describe()

Num samples before filtering: 44391
Num samples After filtering: 41797


Unnamed: 0,X4_mean,X11_mean,X18_mean,X50_mean,X26_mean,X3112_mean
count,41797.0,41797.0,41797.0,41797.0,41797.0,41797.0
mean,0.521676,15.823053,3.207305,1.616637,42.244362,1858.297208
std,0.144273,7.598915,5.347872,0.638061,166.646792,3116.155242
min,0.176725,2.830246,0.032735,0.494166,0.006679,9.725925
25%,0.410757,10.792999,0.318085,1.186312,0.58651,267.23733
50%,0.509045,15.129038,0.714284,1.481727,2.534134,729.941079
75%,0.621267,19.51144,3.402814,1.909787,14.288664,2106.94038
max,0.957788,58.287012,32.388908,4.608223,2369.101479,29876.60141


In [9]:
# Log10 transformation for all traits except X4
LOG_FEATURES = ['X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']

# Normalize
from sklearn.preprocessing import StandardScaler
SCALER = StandardScaler()

# Transform and normalize data
def transform_norm_targets(data_df, is_train=True):
    
    y_df = data_df[CONFIG.TARGET_COLUMNS]
    
    for skewed_trait in LOG_FEATURES:
        y_df.loc[:, skewed_trait] = y_df[skewed_trait].apply(PlantDataPreProcess.log_transform)

    if is_train:
        y = SCALER.fit_transform(y_df)
    else:
        y = SCALER.transform(y_df)
        
    # See transformed data summary
    y_df = pd.DataFrame(y, columns=CONFIG.TARGET_COLUMNS)
    print(y_df.describe())
    print()
    
    return y    

In [10]:
y_train = transform_norm_targets(train, is_train=True)
y_valid = transform_norm_targets(valid, is_train=False)

            X4_mean      X11_mean      X18_mean      X50_mean      X26_mean  \
count  4.179700e+04  4.179700e+04  4.179700e+04  4.179700e+04  4.179700e+04   
mean  -2.728476e-16  3.497719e-16  1.249489e-17  1.274989e-18 -2.158981e-17   
std    1.000012e+00  1.000012e+00  1.000012e+00  1.000012e+00  1.000012e+00   
min   -2.390994e+00 -3.137852e+00 -2.227742e+00 -2.962396e+00 -2.539718e+00   
25%   -7.688273e-01 -5.158568e-01 -7.486282e-01 -6.334180e-01 -6.641329e-01   
50%   -8.755138e-02  1.456847e-01 -2.224117e-01 -4.206462e-02 -5.080546e-02   
75%    6.903019e-01  6.439883e-01  7.930455e-01  6.328630e-01  6.740871e-01   
max    3.022865e+00  2.787718e+00  2.258726e+00  2.975450e+00  2.816056e+00   

         X3112_mean  
count  4.179700e+04  
mean   1.108390e-16  
std    1.000012e+00  
min   -2.888924e+00  
25%   -6.604133e-01  
50%    1.541993e-02  
75%    7.283811e-01  
max    2.511973e+00  

            X4_mean      X11_mean      X18_mean      X50_mean      X26_mean  \
count  110

### SWIN Transformer Data Load

In [11]:
CONFIG.N_TRAIN_SAMPLES = len(train)
CONFIG.N_STEPS_PER_EPOCH = (CONFIG.N_TRAIN_SAMPLES // CONFIG.BATCH_SIZE)
CONFIG.N_STEPS = CONFIG.N_STEPS_PER_EPOCH * CONFIG.N_EPOCHS + 1

test = pd.read_csv('/kaggle/input/planttraits2024/test.csv')
test['file_path'] = test['id'].apply(lambda s: f'/kaggle/input/planttraits2024/test_images/{s}.jpeg')
test['jpeg_bytes'] = test['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())
test.to_pickle('test.pkl')

print('N_TRAIN_SAMPLES:', len(train), '\nN_TEST_SAMPLES:', len(test))
test.head()

  0%|          | 0/6545 [00:00<?, ?it/s]

N_TRAIN_SAMPLES: 41797 
N_TEST_SAMPLES: 6545


Unnamed: 0,id,WORLDCLIM_BIO1_annual_mean_temperature,WORLDCLIM_BIO12_annual_precipitation,WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month,WORLDCLIM_BIO15_precipitation_seasonality,WORLDCLIM_BIO4_temperature_seasonality,WORLDCLIM_BIO7_temperature_annual_range,SOIL_bdod_0.5cm_mean_0.01_deg,SOIL_bdod_100.200cm_mean_0.01_deg,SOIL_bdod_15.30cm_mean_0.01_deg,...,VOD_X_1997_2018_multiyear_mean_m05,VOD_X_1997_2018_multiyear_mean_m06,VOD_X_1997_2018_multiyear_mean_m07,VOD_X_1997_2018_multiyear_mean_m08,VOD_X_1997_2018_multiyear_mean_m09,VOD_X_1997_2018_multiyear_mean_m10,VOD_X_1997_2018_multiyear_mean_m11,VOD_X_1997_2018_multiyear_mean_m12,file_path,jpeg_bytes
0,201238668,8.086756,2246.5,127.321426,20.423418,353.381042,17.535713,80,109,90,...,0.387536,0.37491,0.363712,0.364623,0.379435,0.388294,0.398887,0.397853,/kaggle/input/planttraits2024/test_images/2012...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,202310319,10.844286,495.871429,28.023809,18.738306,786.554382,29.292856,130,155,142,...,0.332776,0.314386,0.291168,0.280947,0.277623,0.276503,0.271212,0.272672,/kaggle/input/planttraits2024/test_images/2023...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,202604412,8.105556,378.328583,39.92857,41.885647,722.071167,34.853809,133,134,139,...,0.437172,0.453239,0.44523,0.435441,0.432758,0.429839,0.41725,0.394216,/kaggle/input/planttraits2024/test_images/2026...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,201353439,7.077679,878.785706,70.428574,37.045235,669.389343,25.15,103,140,116,...,0.352311,0.401131,0.513455,0.519174,0.474007,0.469071,0.408466,0.366724,/kaggle/input/planttraits2024/test_images/2013...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,195351745,4.790555,2299.366699,150.199997,24.136568,462.887695,22.516666,85,114,98,...,0.462181,0.397052,0.358262,0.369279,0.41442,0.469002,0.519534,0.521837,/kaggle/input/planttraits2024/test_images/1953...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [12]:
# Where did values come from?
# ImageNet values for mean/std dev
MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])

TRAIN_TRANSFORMS = A.Compose([
        A.HorizontalFlip(p=0.5),
        # crop images randomly, data augmentation
        A.RandomSizedCrop(
            [448, 512], # range of sizes for cropped image
            CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, w2h_ratio=1.0, p=0.75),
        A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.25),
        A.ImageCompression(quality_lower=85, quality_upper=100, p=0.25),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        # convert images from HWC format to Pytorch CHW format
        ToTensorV2(),
    ])

# validation should be deterministic
VALID_TRANSFORMS = A.Compose([
        A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

TEST_TRANSFORMS = A.Compose([
        A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

In [13]:
class Dataset(Dataset):
    def __init__(self, X_jpeg_bytes, y, transform=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.y = y
        self.transform = transform

    # len(dataset) returns size of dataset
    def __len__(self):
        return len(self.X_jpeg_bytes)

    # dataset[i] gets ith sample
    def __getitem__(self, index):
        if self.transform is not None:
            # torch.Tensor of shape (3, 256, 256)
            X_sample = self.transform(
                image=imageio.imread(self.X_jpeg_bytes[index]),
            )['image']
        else:
            # np.ndarray of shape (512, 512, 3)
            X_sample = image=imageio.imread(self.X_jpeg_bytes[index])
        
        y_sample = self.y[index]
        
        return X_sample, y_sample

train_dataset = Dataset(
    train['jpeg_bytes'].values,
    y_train,
    TRAIN_TRANSFORMS,
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True,
    drop_last=True,
    num_workers=CONFIG.NUM_WORKERS,
)

# For validation evaluation during training
# y_valid is has log, normalization transformations applied
valid_dataset_true = Dataset(
    valid['jpeg_bytes'].values,
    y_valid,
    VALID_TRANSFORMS,
)

valid_dataloader = DataLoader(
    valid_dataset_true,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False,
    drop_last=True,
    num_workers=CONFIG.NUM_WORKERS,
)

# For final evaluation based on 'id'
valid_dataset_id = Dataset(
    valid['jpeg_bytes'].values,
    valid['id'].values,
    VALID_TRANSFORMS,
)

test_dataset = Dataset(
    test['jpeg_bytes'].values,
    test['id'].values,
    TEST_TRANSFORMS,
)

In [14]:
# Visualize augmentations
def visualize_augmentations(dataset, idx=0, samples=10, cols=5):
    dataset = copy.deepcopy(dataset)
    dataset.transform = A.Compose([t for t in dataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    rows = samples // cols
    figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
    for i in range(samples):
        image, _ = dataset[idx]
        ax.ravel()[i].imshow(image)
        ax.ravel()[i].set_axis_off()
    plt.tight_layout()
    plt.show()

In [15]:
# random.seed(42)
# visualize_augmentations(train_dataset)

In [16]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            CONFIG.BACKBONE,
            num_classes=CONFIG.N_TARGETS,
            pretrained=True)  # Use pretrained SWIN Transformer model
        
    def forward(self, inputs):
        return self.backbone(inputs)

model = Model()
model = model.to(CONFIG.DEVICE)
print(model)

model.safetensors:   0%|          | 0.00/204M [00:00<?, ?B/s]

Model(
  (backbone): SwinTransformerV2(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (layers): Sequential(
      (0): SwinTransformerV2Stage(
        (downsample): Identity()
        (blocks): ModuleList(
          (0): SwinTransformerV2Block(
            (attn): WindowAttention(
              (cpb_mlp): Sequential(
                (0): Linear(in_features=2, out_features=512, bias=True)
                (1): ReLU(inplace=True)
                (2): Linear(in_features=512, out_features=3, bias=False)
              )
              (qkv): Linear(in_features=96, out_features=288, bias=False)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=96, out_features=96, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (norm1): LayerNorm((96,), e

In [17]:
# def get_lr_scheduler(optimizer):
#     return torch.optim.lr_scheduler.OneCycleLR(
#         optimizer=optimizer,
#         max_lr=CONFIG.LR_MAX,
#         total_steps=CONFIG.N_STEPS,
#         pct_start=0.1,
#         anneal_strategy='cos',
#         div_factor=1e1,
#         final_div_factor=1e1,
#     )

class AverageMeter(object):
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        self.avg = self.sum / self.count

# LR_SCHEDULER = get_lr_scheduler(optimizer)

In [18]:
MAE = torchmetrics.regression.MeanAbsoluteError().to(CONFIG.DEVICE)
R2 = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to(CONFIG.DEVICE)
LOSS = AverageMeter()

Y_MEAN = torch.tensor(y_train).mean(dim=0).to(CONFIG.DEVICE)
EPS = torch.tensor([1e-6]).to(CONFIG.DEVICE)

def r2_loss(y_pred, y_true):
    ss_res = torch.sum((y_true - y_pred)**2, dim=0)
    ss_total = torch.sum((y_true - Y_MEAN)**2, dim=0)
    ss_total = torch.maximum(ss_total, EPS)
    r2 = torch.mean(ss_res / ss_total)
    return r2

# How is this R2 Loss?
LOSS_FN = nn.SmoothL1Loss() # r2_loss

optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=CONFIG.LR_MAX,
    weight_decay=CONFIG.WEIGHT_DECAY,
)

# Validation metrics
MAE_valid = torchmetrics.regression.MeanAbsoluteError().to(CONFIG.DEVICE)
R2_valid = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to(CONFIG.DEVICE)
LOSS_valid = AverageMeter()

In [19]:
def train_fn(train_loader, model, loss_fn, optimizer, epoch):
    MAE.reset()
    R2.reset()
    LOSS.reset()
    model.train()
    stream = tqdm(train_loader)
        
    for step, (X_batch, y_true) in enumerate(stream):
        X_batch = X_batch.to(CONFIG.DEVICE)
        y_true = y_true.to(CONFIG.DEVICE)
        t_start = time.perf_counter_ns()
        y_pred = model(X_batch)  # forward pass
        loss = loss_fn(y_pred, y_true)
        LOSS.update(loss)
        
        optimizer.zero_grad()  # zero all gradients before backward pass
        loss.backward()  # backward pass: compute gradient of loss wrt model params   
        optimizer.step()  # update parameters
        
#         LR_SCHEDULER.step()
        MAE.update(y_pred, y_true)
        R2.update(y_pred, y_true)
    
        stream.set_description(f"Epoch: {epoch}. Train.      loss: {LOSS.avg:.4f}")
        
        if not CONFIG.IS_INTERACTIVE and (step+1) == CONFIG.N_STEPS_PER_EPOCH:
            print(
                f'EPOCH {epoch+1:02d}, {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' + 
                f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s',
            )
        elif CONFIG.IS_INTERACTIVE:
            print(
                f'\rEPOCH {epoch+1:02d}, {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' + 
                f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s',
                end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
            )

In [20]:
def validate_fn(val_loader, model, loss_fn, epoch):
    MAE_valid.reset()
    R2_valid.reset()
    LOSS_valid.reset()
    
    model.eval()
    stream = tqdm(val_loader)
    
    with torch.no_grad():
        for step, (X_batch, y_true) in enumerate(stream):
            X_batch = X_batch.to(CONFIG.DEVICE)
            y_true = y_true.to(CONFIG.DEVICE)
            t_start = time.perf_counter_ns()
            y_pred = model(X_batch)  # forward pass
            loss = loss_fn(y_pred, y_true)
            LOSS_valid.update(loss)
            MAE_valid.update(y_pred, y_true)
            R2_valid.update(y_pred, y_true)
            
            stream.set_description(f"Epoch: {epoch}. Validation. loss: {LOSS.avg:.4f}")

            if not CONFIG.IS_INTERACTIVE and (step+1) == CONFIG.N_STEPS_PER_EPOCH:
                print(
                    f'EPOCH {epoch+1:02d}, {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' + 
                    f'loss: {LOSS_valid.avg:.4f}, mae: {MAE_valid.compute().item():.4f}, r2: {R2_valid.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s',
                )
            elif CONFIG.IS_INTERACTIVE:
                print(
                    f'\rEPOCH {epoch+1:02d}, {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' + 
                    f'loss: {LOSS_valid.avg:.4f}, mae: {MAE_valid.compute().item():.4f}, r2: {R2_valid.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s',
                    end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
                )

In [21]:
print("Start Training:")
for epoch in range(CONFIG.N_EPOCHS):
    train_fn(train_dataloader, model, LOSS_FN, optimizer, epoch)
    validate_fn(valid_dataloader, model, LOSS_FN, epoch)
    
torch.save(model, 'model.pth')

Start Training:


  0%|          | 0/2612 [00:00<?, ?it/s]

EPOCH 01, 2612/2612 | loss: 0.3071, mae: 0.6460, r2: 0.3028, step: 0.441s


  0%|          | 0/693 [00:00<?, ?it/s]

  0%|          | 0/2612 [00:00<?, ?it/s]

EPOCH 02, 2612/2612 | loss: 0.2749, mae: 0.6029, r2: 0.3831, step: 0.399s


  0%|          | 0/693 [00:00<?, ?it/s]

  0%|          | 0/2612 [00:00<?, ?it/s]

EPOCH 03, 2612/2612 | loss: 0.2527, mae: 0.5727, r2: 0.4377, step: 0.398s


  0%|          | 0/693 [00:00<?, ?it/s]

  0%|          | 0/2612 [00:00<?, ?it/s]

EPOCH 04, 2612/2612 | loss: 0.2322, mae: 0.5441, r2: 0.4875, step: 0.399s


  0%|          | 0/693 [00:00<?, ?it/s]

  0%|          | 0/2612 [00:00<?, ?it/s]

EPOCH 05, 2612/2612 | loss: 0.2127, mae: 0.5167, r2: 0.5345, step: 0.399s


  0%|          | 0/693 [00:00<?, ?it/s]

  0%|          | 0/2612 [00:00<?, ?it/s]

EPOCH 06, 2612/2612 | loss: 0.1932, mae: 0.4892, r2: 0.5811, step: 0.399s


  0%|          | 0/693 [00:00<?, ?it/s]

In [22]:
# Validate on validation set
VALID_ROWS = []
model.eval()

for X_sample_valid, valid_id in tqdm(valid_dataset_id):
    with torch.no_grad():
        y_pred = model(X_sample_valid.unsqueeze(0).to(CONFIG.DEVICE)).detach().cpu().numpy()
    
    y_pred = SCALER.inverse_transform(y_pred).squeeze()
    row = {'id': valid_id}
    
    for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred):
        if k in LOG_FEATURES:
            row[k] = 10 ** v
        else:
            row[k] = v

    VALID_ROWS.append(row)
    
valid_predict_df = pd.DataFrame(VALID_ROWS)
print(valid_predict_df.head())

  0%|          | 0/11098 [00:00<?, ?it/s]

          id   X4_mean   X11_mean   X18_mean  X50_mean   X26_mean   X3112_mean
0  196639184  0.807106   5.239117  12.133066  2.349433   3.045297  1369.272444
1  194172253  0.633066   6.753029   2.882617  1.740260   4.271368   201.887200
2  195436433  0.648220   6.386102   2.295088  2.514164   8.921477   100.524051
3  195607084  0.540580  19.735208  10.100859  1.119755  90.809755  6004.646484
4  114310764  0.610605   5.077404   1.585163  1.880679  23.466263   786.254035


In [23]:
# valid_y_true
print(valid[['id'] + CONFIG.TARGET_COLUMNS].head())
valid_y_true = torch.tensor(valid[CONFIG.TARGET_COLUMNS].to_numpy()).to(CONFIG.DEVICE)

           id   X4_mean   X11_mean  X18_mean  X50_mean   X26_mean   X3112_mean
2   196639184  0.796917   5.291251  8.552908  2.343153   0.395241  1130.096731
27  194172253  0.805731   5.046315  3.618886  1.515897  15.384261   958.310735
32  195436433  0.778597   6.673336  4.851825  3.511029  17.426103   235.341087
37  195607084  0.508277  18.149699  8.744276  1.036462   0.118370  1166.539111
40  114310764  0.564976   2.977790  1.379527  3.541963   1.087715  1173.074608


In [24]:
# Evaluate valid scores
valid_y_pred = torch.tensor(valid_predict_df[CONFIG.TARGET_COLUMNS].to_numpy()).to(CONFIG.DEVICE)

with torch.no_grad():
    # Calculate R2 Loss
    print("Validation R2 Loss (using r2_loss):", r2_loss(valid_y_pred, valid_y_true))

    # Loss function (smooth L1 loss)
    valid_loss = LOSS_FN(valid_y_pred, valid_y_true)
    print("Validation loss (Smooth L1 loss): ", valid_loss)

Validation R2 Loss (using r2_loss): tensor(0.8454, device='cuda:0', dtype=torch.float64)
Validation loss (Smooth L1 loss):  tensor(2828.5845, device='cuda:0', dtype=torch.float64)


In [25]:
# Scratch code to test R2 loss: random produced around R2 score = -92
# v_len = len(valid_y_true)
# train_y_true = torch.tensor(train[0:v_len][CONFIG.TARGET_COLUMNS].to_numpy())
# print("Train and valid R2 score:", 1 - r2_loss_valid(valid_y_true, train_y_true))

MAE_valid.reset()
R2_valid.reset()

print("Torch R2 valid:", R2_valid(valid_y_pred, valid_y_true))
print("Torch MAE valid:", MAE_valid(valid_y_pred, valid_y_true))

Torch R2 valid: tensor(0.0397, device='cuda:0')
Torch MAE valid: tensor(2828.9268, device='cuda:0')


In [26]:
# Predict on test set
SUBMISSION_ROWS = []
model.eval()

for X_sample_test, test_id in tqdm(test_dataset):
    with torch.no_grad():
        y_pred = model(X_sample_test.unsqueeze(0).to(CONFIG.DEVICE)).detach().cpu().numpy()
    
    y_pred = SCALER.inverse_transform(y_pred).squeeze()
    row = {'id': test_id}
    
    for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred):
        if k in LOG_FEATURES:
            row[k.replace('_mean', '')] = 10 ** v
        else:
            row[k.replace('_mean', '')] = v

    SUBMISSION_ROWS.append(row)
    
submission_df = pd.DataFrame(SUBMISSION_ROWS)
print(submission_df.head())
submission_df.to_csv('submission.csv', index=False)
print("Submit!")

  0%|          | 0/6545 [00:00<?, ?it/s]

          id        X4        X11       X18       X50       X26        X3112
0  201238668  0.572182  10.221128  0.947040  1.649065  2.737708   294.136078
1  202310319  0.495761  17.337815  0.352199  1.077703  0.119835   744.844787
2  202604412  0.692888   8.781568  1.374655  2.116764  7.760987   183.386019
3  201353439  0.592930  21.161131  0.197484  0.910024  0.341743  1715.287225
4  195351745  0.465533  15.038305  0.161827  1.484952  0.598778   380.275539
Submit!
