In [None]:
import sys
sys.path.append("../input/tez-lib/")
sys.path.append("../input/timmmaster/")

In [None]:
import tez
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from tez.callbacks import EarlyStopping
from tqdm import tqdm
import math

In [None]:
!nvidia-smi
!mkdir paw-models

In [None]:
class args:
    batch_size = 4
    image_size = 384
    epochs = 10
    fold = 10
    
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

In [None]:
class PawpularDataset:
    def __init__(self, image_paths, dense_features, targets, augmentations):
        self.image_paths = image_paths
        self.dense_features = dense_features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        features = self.dense_features[item, :]
        targets = self.targets[item]
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float),
        }

In [None]:
class PawpularModel(tez.Model):
    def __init__(self):
        super().__init__()
        
        # Transformer
        self.model1 = timm.create_model("swin_large_patch4_window12_384", pretrained=True, 
                                        in_chans=3)
        n_features_1 = self.model1.head.in_features
        self.model1.head = nn.Linear(n_features_1, 128)
        
        # CNN
        self.model2 = timm.create_model('efficientnet_b4', pretrained=True,
                                         in_chans=3)
        out_channels = self.model2.conv_stem.out_channels
        kernel_size = self.model2.conv_stem.kernel_size
        stride = self.model2.conv_stem.stride
        padding = self.model2.conv_stem.padding
        bias = self.model2.conv_stem.bias
        self.model2.conv_stem = nn.Conv2d(in_channels=3, out_channels=out_channels,
                                           kernel_size=kernel_size,
                                           stride=stride, padding=padding,
                                           bias=bias)
        n_features_2 = self.model2.classifier.in_features
        self.model2.classifier = nn.Linear(n_features_2, 128)
        
        # NN head
        self.fc = nn.Sequential(
            nn.Linear(128 + 128 + 12, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        self.dropout = nn.Dropout(0.2)
        
        self.step_scheduler_after = "epoch"

    def monitor_metrics(self, outputs, targets):
        outputs = outputs.cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        # hand-written numpy sigmoid function here
        rmse = metrics.mean_squared_error(targets, 1.0 / (1.0 + np.exp(-outputs)) * 100, squared=False)
        return {"rmse": rmse}

    def fetch_scheduler(self):
        sch = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1
        )
        return sch

    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=1e-4)
        return opt

    def forward(self, image, features, targets=None):
        transformer_embeddings = self.model1(image)
        conv_embeddings = self.model2(image)
        x = torch.cat([transformer_embeddings, conv_embeddings, features], dim=1)
        x = self.dropout(x)
        x = self.fc(x)
        
        # train mode
        if targets is not None:
            loss = nn.BCEWithLogitsLoss()(x, targets.view(-1, 1) / 100)
            metrics = self.monitor_metrics(x, targets)
            return x, loss, metrics
        
        # test mode, return embeddings and features
        x = torch.cat([x, transformer_embeddings, conv_embeddings, features], dim=1)
        return x, 0, {}

In [None]:
train_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.HueSaturationValue(
            hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5
        ),
        albumentations.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
        ),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

valid_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

In [None]:
df = pd.read_csv("../input/same-old-creating-folds/train_10folds.csv")

df_valid = df[df.kfold == args.fold].reset_index(drop=True)

In [None]:
# train 10 folds, 1 model for each fold
for fold_ in range(10):
    print('#'*25)
    print('### FOLD',fold_+1)
    print('#'*25)

    # dataset setting
    df_train = df[df.kfold != fold_].reset_index(drop=True)#.iloc[:320]
    train_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_train["Id"].values]
        
    df_valid = df[df.kfold == fold_].reset_index(drop=True)#.iloc[:160]
    valid_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid["Id"].values]

    dense_features = [
        'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
        'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
    ]
    
    train_dataset = PawpularDataset(
        image_paths=train_img_paths,
        dense_features=df_train[dense_features].values,
        targets=df_train.Pawpularity.values,
        augmentations=train_aug,
    )

    valid_dataset = PawpularDataset(
        image_paths=valid_img_paths,
        dense_features=df_valid[dense_features].values,
        targets=df_valid.Pawpularity.values,
        augmentations=valid_aug,
    )
    
    # model setting
    model = PawpularModel()

    es = EarlyStopping(
        monitor="valid_rmse",
        model_path=f"model_f{args.fold}.bin",
        patience=3,
        mode="min",
        save_weights_only=True,
    )

    # train
    model.fit(
        train_dataset,
        valid_dataset=valid_dataset,
        train_bs=args.batch_size,
        valid_bs=2*args.batch_size,
        device="cuda",
        epochs=args.epochs,
        callbacks=[es],
        fp16=True,
    )
    
    model.save(f"./paw-models/model_f{fold_}.bin")