In [None]:
# import libraries
import random
import pandas as pd
import numpy as np
from scipy import stats

from sklearn.preprocessing import StandardScaler

from torch.utils.data import Dataset, DataLoader
import imageio.v3 as imageio
import albumentations as A
import imgaug
from albumentations.pytorch import ToTensorV2

from torch import nn
import torch
import timm

import torch.optim as optim
import torchmetrics

from tqdm.notebook import tqdm
tqdm.pandas()

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    imgaug.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
# load data
train = pd.read_pickle('/kaggle/input/cs480data/cs480train.pkl')
test = pd.read_pickle('/kaggle/input/cs480data/cs480test.pkl')

labels = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']
features = [x for x in (train.columns.values.tolist()) if x not in labels + ['id', 'file_path', 'jpeg_bytes']]
lower = [0] * 6
higher = [0] * 6
for idx, c in enumerate(labels):
    lower[idx] = train[c].quantile(0.005)
    higher[idx] = train[c].quantile(0.985)

for idx, c in enumerate(labels):
    train = train[(train[c] >= lower[idx]) & (train[c] <= higher[idx])]

In [None]:
# transform 1: log-and-scale y-labels
y = np.zeros_like(train[labels], dtype=np.float32)
for idx, label in enumerate(labels):
    y[:, idx] = np.log10(train[label].values)

y_scaler = StandardScaler()
y = y_scaler.fit_transform(y)


# transform 2: log-and-scale x-features
# note: for log-transforming x-features, some are negative
        # we shift those values by the minimum of the feature column
        # this may pose a problem, i.e. say -5 is minimum of train, but -6 is minimum of test
        # thus, we will 'clip' the data during the test column transformation

skewness = stats.skew(train[features])
log_features = []
for skew, feature in zip(skewness, features):
    if skew > 1: log_features.append(feature)

LOG_TRAIN_FEATURES_MIN_RECOVERY = [0.00] * len(features)  
x_tab_train = np.zeros_like(train[features], dtype = np.float32)
for idx, feature in enumerate(features):
    v = train[feature].values
    if feature in log_features:
        min_val = np.min(v)
        LOG_TRAIN_FEATURES_MIN_RECOVERY[idx] = np.min(v) if min_val < 0 else 0
        v = np.log10(v - LOG_TRAIN_FEATURES_MIN_RECOVERY[idx] + 1)
    x_tab_train[:, idx] = v

tab_scaler = StandardScaler()
x_tab_train = tab_scaler.fit_transform(x_tab_train)

# transform 3: log-and-scale x-features for test set
x_tab_test = np.zeros_like(test[features], dtype=np.float32)
for idx, feature in enumerate(features):
    v = test[feature].values
    if feature in log_features:
        pre_clipped = v - LOG_TRAIN_FEATURES_MIN_RECOVERY[idx] + 1
        clipped = np.clip(v, a_min=1e-10, a_max=None) # see here from note in transform 2
        v = np.log10(clipped)
    x_tab_test[:, idx] = v

x_tab_test = tab_scaler.fit_transform(x_tab_test)

In [None]:
epochs = 6
batch_size = 10
total_steps = len(train) // batch_size * epochs + 1 

In [None]:
# create datasets (mostly borrowed from HdJoJo)
# image transforms: flip, crop, brightness, compression
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]


TRAIN_IMAGE_TRANSFORMS = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomSizedCrop(
            [112, 128],
            128, 128, w2h_ratio=1.0, p=0.8),
        A.Resize(384, 384),
        A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.3),
        A.ImageCompression(quality_lower=85, quality_upper=100, p=0.3),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

TEST_IMAGE_TRANSFORMS = A.Compose([
        A.Resize(384, 384),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

class Dataset(Dataset):
    def __init__(self, X_jpeg_bytes, X_tab_data, y, image_transforms=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.tab_data = X_tab_data
        self.y = y
        self.image_transforms = image_transforms

    def __len__(self):
        return len(self.X_jpeg_bytes)

    def __getitem__(self, index):
        X_image_sample = self.image_transforms(
            image=imageio.imread(self.X_jpeg_bytes[index]),
        )['image']
        X_tabular_sample = self.tab_data[index]
        y_sample = self.y[index]
        
        return X_image_sample, X_tabular_sample, y_sample


train_dataset = Dataset(
    train['jpeg_bytes'].values,
    x_tab_train,
    y,
    TRAIN_IMAGE_TRANSFORMS,
)

train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
)

test_dataset = Dataset(
    test['jpeg_bytes'].values,
    x_tab_test,
    test['id'].values,
    TEST_IMAGE_TRANSFORMS,
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=0
)

In [None]:
device = 'cuda' # change to cuda/cpu

In [None]:
# model change
tab_extractor_layer = 512
tab_feature_rep = 512
image_feature_rep = 512
comb_l1 = 512
comb_l2 = 256
second_layer = True

In [None]:
# define model
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        # tab extractor
        self.tab_extractor = nn.Sequential(
            nn.Linear(len(features), tab_extractor_layer),
            nn.BatchNorm1d(tab_extractor_layer),
            nn.ReLU(),
            nn.Linear(tab_extractor_layer, tab_feature_rep)
        )

        # image extractor
        self.pre_trained = timm.create_model('swin_large_patch4_window12_384.ms_in22k_ft_in1k', num_classes=len(labels), pretrained=False)
        self.pre_trained.load_state_dict(torch.load('/kaggle/input/swin-transformer-v1-planttraits2024-finetuned/pytorch/log3-noval-8epoch/1/model_08.pth'))
        for p in self.pre_trained.parameters(): p.requires_grad = False # freeze
        self.pre_trained.head = nn.Identity()
        self.image_extractor = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(self.pre_trained.num_features, image_feature_rep))

        # combination layers
        if second_layer:
            self.concat_nn = nn.Sequential(
                nn.Linear(tab_feature_rep + image_feature_rep, comb_l1),
                nn.BatchNorm1d(comb_l1),
                nn.ReLU(),
                nn.Linear(comb_l1, comb_l2),
                nn.BatchNorm1d(comb_l2),
                nn.ReLU(),
                nn.Linear(comb_l2, len(labels))
            )
        else:
            self.concat_nn = nn.Sequential(
                nn.Linear(tab_feature_rep + image_feature_rep, comb_l1),
                nn.BatchNorm1d(comb_l1),
                nn.ReLU(),
                nn.Linear(comb_l1, len(labels))
            )
        
    def forward(self, image, tabular):
        # extract from Image
        image_features = self.image_extractor(self.pre_trained(image).permute(0,3,1,2))
        # extract from Tabular
        tabular_features = self.tab_extractor(tabular)
        # fusion
        combo_features = torch.cat([image_features, tabular_features], dim=1)
        # combination
        output = self.concat_nn(combo_features)
        return output

model = Model()
model = model.to(device)

In [None]:
lr = 1e-4
weight_decay = 1e-2

In [None]:
# define scheduler/metrics
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        self.avg = self.sum / self.count

MAE = torchmetrics.regression.MeanAbsoluteError().to(device)
R2 = torchmetrics.regression.R2Score(num_outputs=len(labels), multioutput='uniform_average').to(device)
LOSS = AverageMeter()

loss_fn = nn.SmoothL1Loss()

def get_lr_scheduler(optimizer, lr_max):
    return torch.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer,
        max_lr=lr_max,
        total_steps=total_steps,
        pct_start=0.1,
        anneal_strategy='cos',
        div_factor=1e1,
        final_div_factor=1e1,
    )

optimizer = torch.optim.AdamW(params=model.parameters(),lr=lr, weight_decay=weight_decay)
scheduler = get_lr_scheduler(optimizer, lr)

In [None]:
# training
tracker = np.zeros((epochs, len(train) // batch_size), dtype=float)
for epoch in range(epochs):
    model.train()
    MAE.reset()
    R2.reset()
    LOSS.reset()

    for step, (image, tab, y) in enumerate(train_dataloader):
        image = image.to(device)
        tab = tab.to(device)
        y = y.to(device)
        model = model.to(device)

        y_pred = model(image, tab)

        loss = loss_fn(y, y_pred)
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        scheduler.step()
        
        LOSS.update(loss)
        MAE.update(y_pred, y)
        R2.update(y_pred, y)

        tracker[epoch, step] = loss

        if (step + 1) == len(train) // batch_size:
            print(str(epoch + 1))
            print(str(LOSS.avg))
            print(str(MAE.compute().item()))
            print(str(R2.compute().item()))

In [None]:
# save model
torch.save(model.to('cpu').state_dict(), 'model.pth')

In [None]:
# evaluate model
submission_rows = []
model.to(device)
model.eval()
for image, tab, id in tqdm(test_dataloader):
    with torch.no_grad():
        y_pred = model(image.to(device), tab.to(device)).detach().cpu().numpy()

    y_pred = y_scaler.inverse_transform(y_pred).squeeze()
    row = {'id': int(id)}

    for k, v in zip(labels, y_pred):
        row[k.replace('_mean', '')] = 10 ** v
    
    submission_rows.append(row)

submission_df = pd.DataFrame(submission_rows)
submission_df.to_csv('20892920_huh.csv', index=False)
print('done')