In [18]:
import numpy as np
import pandas as pd
import imageio.v3 as imageio
import albumentations as A

# import torch_xla as xla
# import torch_xla.core.xla_model as xm
# import torch_xla.distributed.xla_multiprocessing as xmp
# import torch_xla.distributed.xla_backend

from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.distributed import DistributedSampler
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from scipy.cluster import hierarchy
from scipy.spatial.distance import squareform


import torch
import timm
import torchmetrics
import time
import psutil

In [19]:
torch.manual_seed(42)
np.random.seed(42)
device = 'cuda:1' #xla.device()

In [20]:
class Config():
    IMAGE_SIZE = 384
    BACKBONE = 'swin_large_patch4_window12_384.ms_in22k_ft_in1k'
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    N_TARGETS = len(TARGET_COLUMNS)
    BATCH_SIZE = 4
    LR_MAX = 1e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 5
    TRAIN_MODEL = True
    IS_INTERACTIVE = True  # os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
    tpu_ids = range(8)
    Lower_Quantile = 0.005
    Upper_Quantile = 0.985
    SHRINK_SAMPLES = False

CONFIG = Config()

In [21]:
class TrainDataset(Dataset):
    def __init__(self, X_jpeg_bytes, X_tabular, y, transforms=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.X_tabular = X_tabular
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        X_sample = self.transforms(
            image=imageio.imread(self.X_jpeg_bytes[index]),
        )['image']
        X_tabular_sample = self.X_tabular[index]
        y_sample = self.y[index]

        return X_sample, X_tabular_sample, y_sample
class TabularBackbone(nn.Module):
    def __init__(self, n_features, out_features):
        super().__init__()
        self.out_features = out_features
        self.fc = nn.Sequential(
            nn.Linear(n_features, 512),
            nn.BatchNorm1d(512),
            nn.GELU(),
            # nn.Dropout(0.1),
            nn.Linear(512, out_features),
        )

    def forward(self, x):
        return self.fc(x)
    
class ImageBackbone(nn.Module):
    def __init__(self, backbone_name, weight_path, out_features, fixed_feature_extractor=False):
        super().__init__()
        self.out_features = out_features
        self.backbone = timm.create_model(backbone_name, pretrained=False, num_classes=CONFIG.N_TARGETS)
        #self.backbone.load_state_dict(torch.load(weight_path))
        if fixed_feature_extractor:
            for param in self.backbone.parameters():
                param.requires_grad = False
        in_features = self.backbone.num_features
        
        self.backbone.head = nn.Identity()
        self.head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(in_features, out_features),
        )

    def forward(self, x):
        x = self.backbone(x)
        x = x.permute(0, 3, 1, 2)
        return self.head(x)

class Model(nn.Module):
    def __init__(self, img_backbone, tab_backbone, out_features:int):
        super().__init__()
        self.img_backbone = img_backbone
        self.tab_backbone = tab_backbone
        self.fc = nn.Sequential(
            nn.Linear(self.tab_backbone.out_features + self.img_backbone.out_features, 1024),
            nn.BatchNorm1d(1024),
            nn.GELU(),
            # nn.Dropout(0.1),
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            nn.GELU(),
            # nn.Dropout(0.1),
            nn.Linear(256, out_features),
        )

    def forward(self, img, tab):
        img_features = self.img_backbone(img)
        tab_features = self.tab_backbone(tab)
        features = torch.cat([img_features, tab_features], dim=1)
        return self.fc(features)
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        self.avg = self.sum / self.count
def get_lr_scheduler(optimizer):
    return torch.optim.lr_scheduler.OneCycleLR(
    optimizer=optimizer,
    max_lr=CONFIG.LR_MAX,
    total_steps=CONFIG.N_STEPS,
    pct_start=0.1,
    anneal_strategy='cos',
    div_factor=1e1,
    final_div_factor=1e1,
    )
        

In [22]:
import os
BASE_DIR = os.path.join(os.getcwd() , 'data')
train_df = pd.read_csv(BASE_DIR  +  '/train.csv')

train = pd.read_csv(BASE_DIR  +  '/train.csv')
test =  pd.read_csv(BASE_DIR  +  '/test.csv')

for column in CONFIG.TARGET_COLUMNS:
    lower_quantile = train[column].quantile(CONFIG.Lower_Quantile)
    upper_quantile = train[column].quantile(CONFIG.Upper_Quantile)
    train = train[(train[column] >= lower_quantile) & (train[column] <= upper_quantile)]
    
tabular = train.drop(columns = ['id'] + CONFIG.TARGET_COLUMNS)
test_tabular = test.drop(columns = ['id'])

In [23]:
tabular

Unnamed: 0,WORLDCLIM_BIO1_annual_mean_temperature,WORLDCLIM_BIO12_annual_precipitation,WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month,WORLDCLIM_BIO15_precipitation_seasonality,WORLDCLIM_BIO4_temperature_seasonality,WORLDCLIM_BIO7_temperature_annual_range,SOIL_bdod_0.5cm_mean_0.01_deg,SOIL_bdod_100.200cm_mean_0.01_deg,SOIL_bdod_15.30cm_mean_0.01_deg,SOIL_bdod_30.60cm_mean_0.01_deg,...,VOD_X_1997_2018_multiyear_mean_m03,VOD_X_1997_2018_multiyear_mean_m04,VOD_X_1997_2018_multiyear_mean_m05,VOD_X_1997_2018_multiyear_mean_m06,VOD_X_1997_2018_multiyear_mean_m07,VOD_X_1997_2018_multiyear_mean_m08,VOD_X_1997_2018_multiyear_mean_m09,VOD_X_1997_2018_multiyear_mean_m10,VOD_X_1997_2018_multiyear_mean_m11,VOD_X_1997_2018_multiyear_mean_m12
0,21.478968,772.404785,110.047623,56.210766,161.457764,13.886666,129,141,134,137,...,0.452674,0.469246,0.479971,0.488434,0.495728,0.482645,0.448959,0.419139,0.404626,0.403707
1,26.927639,1456.733276,329.366669,109.906487,178.745422,19.846668,139,140,140,137,...,0.448251,0.470133,0.448403,0.405665,0.382672,0.364023,0.362919,0.368997,0.391109,0.407680
3,25.558649,2246.017822,329.342224,56.563957,211.065521,16.768000,116,132,122,129,...,0.548350,0.551841,0.515061,0.555757,0.525224,0.571438,0.572420,0.566320,0.556564,0.512105
4,25.204723,2309.776123,284.576202,39.409706,36.499138,10.257143,100,113,105,111,...,0.645712,0.641703,0.638654,0.647840,0.647654,0.639092,0.634200,0.628594,0.644814,0.654979
5,17.526487,408.637756,101.841835,110.660789,591.169128,25.035715,141,152,146,149,...,0.295938,0.289349,0.275169,0.268171,0.264103,0.265927,0.265918,0.267588,0.263034,0.265842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43358,25.551382,3294.408203,353.204071,47.970898,75.369301,12.087244,105,114,108,110,...,0.509698,0.516254,0.509183,0.504985,0.502834,0.504981,0.510419,0.502823,0.497064,0.487178
43359,24.163185,908.924438,238.119995,110.597702,120.009247,14.226222,132,141,137,139,...,0.513163,0.544969,0.541757,0.519996,0.488227,0.452186,0.428144,0.417601,0.417884,0.424877
43360,23.165426,57.146667,10.146667,52.789906,473.979675,26.604889,138,143,138,140,...,0.183251,0.195377,0.205470,0.212486,0.213125,0.212327,0.200607,0.187665,0.178380,0.170875
43361,15.147365,804.086731,163.000000,92.718536,182.917358,22.998470,122,128,129,126,...,0.335833,0.335149,0.336188,0.353980,0.388565,0.408230,0.417128,0.424777,0.395074,0.371460


In [24]:
if CONFIG.SHRINK_SAMPLES: #this one's destructive
    # construct correlation matrix
    corr_matrix = tabular.corr(method='spearman')
    
    # hierarchical cluster based on the correlations
    dissimilarity = 1 - abs(corr_matrix.values)
    
    linkage_matrix = hierarchy.linkage(squareform(dissimilarity), method='complete')
    
    # get the cluster labels
    threshold = 0.15
    cluster_labels = hierarchy.fcluster(linkage_matrix, threshold, criterion='distance')
    n_clusters = np.unique(cluster_labels).shape[0]
    print(f'Number of clusters: {n_clusters}\n')
    
    
    cluster_features = {}
    # show features within each cluster
    for label in range(1, n_clusters + 1):
        leaves_in_cluster = cluster_labels == label
        cluster_features[label] = corr_matrix.columns[leaves_in_cluster].tolist()
        
    final_features = []
    for cols_in_cluster in cluster_features.values():
        final_features.append(cols_in_cluster[0])
        
    tabular = tabular[final_features]
    test_tabular = test_tabular[final_features]
    # normalize tabular inputs

LOG_FEATURES = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']

y_train = np.zeros_like(train[CONFIG.TARGET_COLUMNS], dtype=np.float32)
for target_idx, target in enumerate(CONFIG.TARGET_COLUMNS):
    v = train[target].values
    if target in LOG_FEATURES:
        v = np.log10(v)
    y_train[:, target_idx] = v




X_SCALER = StandardScaler()
tabular_scaled = X_SCALER.fit_transform(tabular).astype(np.float32)
test_tabular_scaled = X_SCALER.transform(test_tabular).astype(np.float32)

Y_SCALER = StandardScaler()
y_train_scaled = Y_SCALER.fit_transform(y_train).astype(np.float32)


print('JPEG Files Processing:')
train['file_path'] = train['id'].apply(lambda s: f'{BASE_DIR}/train_images/{s}.jpeg')
train['jpeg_bytes'] = train['file_path'].apply(lambda fp: open(fp, 'rb').read())

test['file_path'] = test['id'].apply(lambda s: f'{BASE_DIR}/test_images/{s}.jpeg')
test['jpeg_bytes'] = test['file_path'].apply(lambda fp: open(fp, 'rb').read())
print('JPEG Files Processing End')




JPEG Files Processing:
JPEG Files Processing End


In [25]:
CONFIG.N_TRAIN_SAMPLES = len(tabular_scaled)
CONFIG.N_STEPS_PER_EPOCH = (CONFIG.N_TRAIN_SAMPLES // CONFIG.BATCH_SIZE)
CONFIG.N_STEPS = CONFIG.N_STEPS_PER_EPOCH * CONFIG.N_EPOCHS + 1
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

TRAIN_TRANSFORMS = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomSizedCrop(
        [50,50],
        CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, w2h_ratio=1.0, p=0.75),
    A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.25),
    A.ImageCompression(quality_lower=85, quality_upper=100, p=0.25),
    A.ToFloat(),
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
    ToTensorV2(),
])

TEST_TRANSFORMS = A.Compose([
    A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
    A.ToFloat(),
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
    ToTensorV2(),
])
# train / test split
train_idx = np.random.choice(len(train), int(1 * len(train)), replace=False)
test_idx = np.setdiff1d(np.arange(len(train)), train_idx)

train_images = train['jpeg_bytes'].values[train_idx]
train_tabular = tabular_scaled[train_idx]
train_y = y_train_scaled[train_idx]

val_images = train['jpeg_bytes'].values[test_idx]
val_tabular = tabular_scaled[test_idx]
val_y = y_train_scaled[test_idx]

test_images = test['jpeg_bytes'].values
test_tabular = test_tabular_scaled

train_dataset = TrainDataset(
    train_images,
    train_tabular,
    train_y,
    TRAIN_TRANSFORMS
)

validation_dataset = TrainDataset(
    val_images,
    val_tabular,
    val_y,
    TEST_TRANSFORMS
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True,
    drop_last=True,
    num_workers=0#psutil.cpu_count(),
)

validation_dataloader = DataLoader(
    validation_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False,
    drop_last=False,
    num_workers=0#psutil.cpu_count(),
)

test_dataset = TrainDataset(
    test['jpeg_bytes'].values,
    test_tabular,
    test['id'].values,
    TEST_TRANSFORMS,
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=2,
    shuffle=False,
    drop_last=False,
    num_workers=0#psutil.cpu_count(),
)

In [26]:
img_backbone = ImageBackbone('swin_large_patch4_window12_384.ms_in22k_ft_in1k', f'{BASE_DIR}/model_08_ensemble.pth', 384, fixed_feature_extractor=True)
tab_backbone = TabularBackbone(n_features=tabular_scaled.shape[1], out_features=128)

model = Model(img_backbone, tab_backbone, CONFIG.N_TARGETS)
model = model.to(device)
#model.load_state_dict(torch.load( f'{BASE_DIR}/model_08_ensemble.pth'))

In [27]:
# # # load model
# # model.to(device)

# # SUBMISSION_ROWS = []
# # model.eval()

# for X_image, X_tabular, test_id in tqdm(test_dataloader):
#     with torch.no_grad():
#         y_pred = model(X_image.to(device), X_tabular.to(device)).detach().cpu().numpy()
#     print(y_pred.shape)
#     y_pred = Y_SCALER.inverse_transform(y_pred).squeeze()
#     row = {'id': int(test_id)}


#     for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred):
#         if k in LOG_FEATURES:
#             row[k.replace('_mean', '')] = 10 ** v
#         else:
#             row[k.replace('_mean', '')] = v

#     SUBMISSION_ROWS.append(row)


# TARGET_ORDER  = ['id','X4', 'X11', 'X18','X26', 'X50', 'X3112']    
# submission_df = pd.DataFrame(SUBMISSION_ROWS)[TARGET_ORDER]
# #[TARGET_ORDER]
# submission_df.to_csv('efficient_net_submission.csv', index=False)
# print("Submit!")


validation_dataloader

# Training 

In [28]:
MAE = torchmetrics.regression.MeanAbsoluteError().to(device)
R2 = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to(device)
LOSS = AverageMeter()

Y_MEAN = torch.tensor(y_train).mean(dim=0).to(device)
EPS = torch.tensor([1e-6]).to(device)

LOSS_FN = nn.SmoothL1Loss()  # r2_loss

optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=CONFIG.LR_MAX,
    weight_decay=CONFIG.WEIGHT_DECAY,
)

LR_SCHEDULER = get_lr_scheduler(optimizer)


print("Start Training:")
for epoch in range(CONFIG.N_EPOCHS):
    MAE.reset()
    R2.reset()
    LOSS.reset()
    model.train()

    for step, (X_image, X_tabular, y_true) in enumerate(train_dataloader):
        X_image = X_image.to(device)
        X_tabular = X_tabular.to(device)
        y_true = y_true.to(device)
        model = model.to(device)
        t_start = time.perf_counter_ns()
        y_pred = model(X_image, X_tabular)
        loss = LOSS_FN(y_pred, y_true)
        LOSS.update(loss)
        loss.backward()
        optimizer.step()
        # xm.optimizer_step(optimizer, barrier=True)
        optimizer.zero_grad()
        LR_SCHEDULER.step()
        print(y_pred.shape , y_true.shape)
        MAE.update(y_pred, y_true)
        R2.update(y_pred, y_true)

        if not CONFIG.IS_INTERACTIVE and (step + 1) == CONFIG.N_STEPS_PER_EPOCH:
            print(
                f'EPOCH {epoch + 1:02d}, {step + 1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' +
                f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                f'step: {(time.perf_counter_ns() - t_start) * 1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
            )
        elif CONFIG.IS_INTERACTIVE:
            print(
                f'\rEPOCH {epoch + 1:02d}, {step + 1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' +
                f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                f'step: {(time.perf_counter_ns() - t_start) * 1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
            )
    model = model.to(device)
    model.eval()
    MAE.reset()
    R2.reset()
    LOSS.reset()

    print('IN  Validation:')
    with torch.no_grad():
        for X_image, X_tabular, y_true in (validation_dataloader):
            X_image = X_image.to(device)
            y_true = y_true.to(device)
            y_pred = model(X_image, X_tabular)
            loss = LOSS_FN(y_pred, y_true)
            LOSS.update(loss)
            MAE.update(y_pred, y_true)
            R2.update(y_pred, y_true)

            if not CONFIG.IS_INTERACTIVE:
                print(
                    f'EPOCH {epoch + 1:02d}, VALIDATION | ' +
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}',
                )
            elif CONFIG.IS_INTERACTIVE:
                print(
                    f'\rEPOCH {epoch + 1:02d}, VALIDATION | ' +
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}',
                    end='\n',
                )

Start Training:
torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0001/9605 | loss: 0.2809, mae: 0.6548, r2: -0.5478, step: 0.102s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0002/9605 | loss: 0.3327, mae: 0.6930, r2: -0.4403, step: 0.097s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0003/9605 | loss: 0.4126, mae: 0.7825, r2: -0.2295, step: 0.098s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0004/9605 | loss: 0.4582, mae: 0.8463, r2: -0.3824, step: 0.098s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0005/9605 | loss: 0.4203, mae: 0.8024, r2: -0.4467, step: 0.098s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0006/9605 | loss: 0.3848, mae: 0.7554, r2: -0.4643, step: 0.098s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0007/9605 | loss: 0.4325, mae: 0.8100, r2: -0.3533, step: 0.098s, lr: 1.00e-05torch.Size([4, 6]) torch.Size([4, 6])
EPOCH 01, 0008/9605 | loss: 0.4585, mae: 0.8424, r2: -0.4318, st

KeyboardInterrupt: 

In [None]:
R2 = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to(device)

In [31]:
CONFIG.N_TARGETS

6

: 

In [30]:
R2sc = torchmetrics.regression.R2Score(num_outputs= CONFIG.N_TARGETS , multioutput="uniform_average").to(device)

t1 = torch.ones((2,6)).to(device)
t2 = torch.ones((2,6)).to(device)
R2sc.update(t1 , t2 )
R2sc.compute().item()

1.0