- cancel semi-supervised learning
- add TTA
- n_epochs 200
- cancel color jitter params for data augmentation
- cancel classification types

In [1]:
!pip install timm

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[K     |████████████████████████████████| 376 kB 868 kB/s 
Installing collected packages: timm
Successfully installed timm-0.4.12


In [2]:
!pip install ttach

Collecting ttach
  Downloading ttach-0.0.3-py3-none-any.whl (9.8 kB)
Installing collected packages: ttach
Successfully installed ttach-0.0.3


In [3]:
!pip install lightly

Collecting lightly
  Downloading lightly-1.1.15-py3-none-any.whl (240 kB)
[K     |████████████████████████████████| 240 kB 879 kB/s 
[?25hCollecting hydra-core>=1.0.0
  Downloading hydra_core-1.1.0-py3-none-any.whl (144 kB)
[K     |████████████████████████████████| 144 kB 3.7 MB/s 
Collecting lightly-utils==0.0.1
  Downloading lightly_utils-0.0.1-py3-none-any.whl (6.3 kB)
Collecting antlr4-python3-runtime==4.8
  Downloading antlr4-python3-runtime-4.8.tar.gz (112 kB)
[K     |████████████████████████████████| 112 kB 3.9 MB/s 
[?25hCollecting omegaconf==2.1.*
  Downloading omegaconf-2.1.0-py3-none-any.whl (74 kB)
[K     |████████████████████████████████| 74 kB 1.5 MB/s 
[?25hCollecting importlib-resources
  Downloading importlib_resources-5.2.0-py3-none-any.whl (27 kB)
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l- \ | done
[?25h  Created wheel for antlr4-python3-runtime: fil

In [4]:
import os
import random
import numpy as np
import pandas as pd
from PIL import Image
from tabulate import tabulate
from sklearn.metrics import mean_squared_error

import torch
from torch import nn
import tensorflow as tf
from torch.optim import Adam
from torch.utils import data
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
from torch.optim import lr_scheduler
from collections import defaultdict, Counter

# various models can be selected: https://pytorch.org/vision/stable/models.html
from torchvision.models import resnet34, resnet18
from torchvision import transforms as T
import torchvision

import ttach as tta
import lightly
import math

import timm

In [5]:
n_folds = 5
photo_dir = "../input/atma11-dataset/photos/"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# read data

In [6]:
train_df = pd.read_csv("../input/atma11-dataset/train.csv")
test_df = pd.read_csv("../input/atma11-dataset/test.csv")

# functions

In [7]:
def to_img_path(object_id):
    return os.path.join(photo_dir, f'{object_id}.jpg')

def read_image(object_id):
    return Image.open(to_img_path(object_id))

def calculate_metrics(y_true, y_pred) -> dict:
    return {'rmse': mean_squared_error(y_true, y_pred) ** .5}

def create_metadata(input_df):
    out_df = input_df[['object_id']].copy()
    out_df['object_path'] = input_df['object_id'].map(to_img_path)

    if "target" in input_df:
        out_df["target"] = input_df["target"] 

    return out_df

In [8]:
def stratified_group_k_fold(X, y, groups, k, seed=None):
    labels_num = np.max(y) + 1
    y_counts_per_group = defaultdict(lambda: np.zeros(labels_num))
    y_distr = Counter()
    for label, g in zip(y, groups):
        y_counts_per_group[g][label] += 1
        y_distr[label] += 1

    y_counts_per_fold = defaultdict(lambda: np.zeros(labels_num))
    groups_per_fold = defaultdict(set)

    def eval_y_counts_per_fold(y_counts, fold):
        y_counts_per_fold[fold] += y_counts
        std_per_label = []
        for label in range(labels_num):
            label_std = np.std([y_counts_per_fold[i][label] / y_distr[label] for i in range(k)])
            std_per_label.append(label_std)
        y_counts_per_fold[fold] -= y_counts
        return np.mean(std_per_label)
    
    groups_and_y_counts = list(y_counts_per_group.items())
    random.Random(seed).shuffle(groups_and_y_counts)

    for g, y_counts in sorted(groups_and_y_counts, key=lambda x: -np.std(x[1])):
        best_fold = None
        min_eval = None
        for i in range(k):
            fold_eval = eval_y_counts_per_fold(y_counts, i)
            if min_eval is None or fold_eval < min_eval:
                min_eval = fold_eval
                best_fold = i
        y_counts_per_fold[best_fold] += y_counts
        groups_per_fold[best_fold].add(g)

    all_groups = set(groups)
    for i in range(k):
        train_groups = all_groups - groups_per_fold[i]
        test_groups = groups_per_fold[i]

        train_indices = [i for i, g in enumerate(groups) if g in train_groups]
        test_indices = [i for i, g in enumerate(groups) if g in test_groups]

        yield train_indices, test_indices

In [9]:
IMG_MEAN = [0.485, 0.456, 0.406]
IMG_STD = [0.229, 0.224, 0.225]

class AtmaDataset(data.Dataset):
    """atmaCup用にデータ読み込み等を行なうデータ・セット"""
    object_path_key = "object_path"
    label_key = "target"

    @property
    def meta_keys(self):
        retval = [self.object_path_key]

        if self.is_train:
            retval += [self.label_key]

        return retval

    def __init__(self, meta_df: pd.DataFrame, is_train=True):
        """
        args:
            meta_df: 
                画像へのパスと label 情報が含まれている dataframe
                必ず object_path に画像へのパス, target に正解ラベルが入っている必要があります

            is_train:
                True のとき学習用のデータ拡張を適用します.
                False の時は単に size にリサイズを行います
        """

        self.is_train = is_train
        for k in self.meta_keys:
            if k not in meta_df:
                raise ValueError("meta df must have {}".format(k))

        self.meta_df = meta_df.reset_index(drop=True)
        self.index_to_data = self.meta_df.to_dict(orient="index")

        size = (224, 224)

        additional_items = (
            [T.Resize(size)]
            if not is_train
            else [
                T.RandomGrayscale(p=0.2),
                T.RandomVerticalFlip(),
                T.RandomHorizontalFlip(),
                #T.RandomRotation(90),
                #T.ColorJitter(
                #    brightness=0.1,
                #    contrast=0.1,
                #),
                T.RandomResizedCrop(size),
            ]
        )

        self.transformer = T.Compose(
            [*additional_items, T.ToTensor(), T.Normalize(mean=IMG_MEAN, std=IMG_STD)]
        )

    def __getitem__(self, index):
        data = self.index_to_data[index]

        obj_path, label = data.get(self.object_path_key), data.get(self.label_key, -1)
        img = Image.open(obj_path)
        img = self.transformer(img)
        return img, label

    def __len__(self):
        return len(self.meta_df)

In [10]:
def train(
    model: nn.Module,
    optimizer: Optimizer,
    scheduler: lr_scheduler,
    train_loader: data.DataLoader
)-> pd.Series:
    model.train()
    
    criterion = nn.MSELoss() 
    
    metrics = defaultdict(float)
    n_iters = len(train_loader)
    
    for i, (x_i, y_i) in enumerate(train_loader):
        x_i = x_i.to(device)
        y_i = y_i.to(device).reshape(-1,1).float()
        
        output = model(x_i)
        loss = criterion(output, y_i)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        metric_i = {
            "loss": loss.item()
        }
        for k, v in metric_i.items():
            metrics[k] /= n_iters
            
    return pd.Series(metrics).add_prefix("train_")

In [11]:
def valid_predict(model: nn.Module, loader: data.DataLoader) -> np.ndarray:
    model.eval()
    predicts = []
    
    for x_i, y_i in loader:
        with torch.no_grad():
            output = model(x_i.to(device))
        
        predicts.extend(output.data.cpu().numpy())
        
    pred = np.array(predicts).reshape(-1)
        
    return pred

transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.VerticalFlip(),
    ]
)

# prediction with test time augmentation
def predict(model: nn.Module, loader: data.DataLoader) -> np.ndarray:
    model.eval()
    predicts = []
    tta_model = tta.ClassificationTTAWrapper(model, transforms)
    
    for x_i, y_i in loader:
        with torch.no_grad():
            output = tta_model(x_i.to(device))
        
        predicts.extend(output.data.cpu().numpy())
        
    pred = np.array(predicts).reshape(-1)
    return pred

In [12]:
def valid(
    model: nn.Module,
    y_valid: np.ndarray,
    valid_loader: data.DataLoader
) -> pd.Series:
    pred = valid_predict(model, valid_loader)
    score = calculate_metrics(y_valid, pred)
    
    valid_score = pd.Series(score)
    
    return valid_score.add_prefix("valid_"), pred

In [13]:
def run_fold(
    model: nn.Module,
    train_df: pd.DataFrame,
    valid_df: pd.DataFrame,
    y_valid: np.ndarray,
    n_epochs = 30,
    n_fold = 1
   ) -> np.ndarray:
    optimizer = Adam(model.parameters(), lr=1e-3)
    
    train_dataset = AtmaDataset(meta_df = train_df)
    valid_dataset = AtmaDataset(meta_df = valid_df, is_train=False)
    
    train_loader = data.DataLoader(
        train_dataset, batch_size=64, shuffle=True, drop_last=True, num_workers=4
    )
    valid_loader = data.DataLoader(valid_dataset, batch_size=256, num_workers=4)

    scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=n_epochs, steps_per_epoch=len(train_loader))
    
    score_df = pd.DataFrame()
    valid_score = np.inf
    valid_score_key = "valid_rmse"
    valid_best_pred = None
    
    for epoch in range(1, n_epochs + 1):
        print(f"start {epoch}")
        score_train = train(model, optimizer, scheduler, train_loader)
        score_valid, y_valid_pred = valid(model=model, valid_loader=valid_loader, y_valid = y_valid)
        
        # --- 学習のロスと検証スコアの値をデータフレームに追加
        row = pd.concat([score_train, score_valid])
        row["epoch"] = epoch
        row = pd.DataFrame([row])
        score_df = pd.concat([score_df, row], ignore_index=True)
        # ---
        
        current_score = score_valid[valid_score_key]
        if current_score < valid_score:
            print(tabulate(row, headers=row.columns))
            print(f'validation score is improved!! {valid_score:.4f} -> {current_score:.4f}')
            torch.save(model.state_dict(), "model_best"+str(n_fold)+".pth")
            valid_score = current_score
            valid_best_pred = y_valid_pred
            
    score_df.to_csv('score_'+str(n_fold)+'.csv', index=False)
    return valid_best_pred

In [14]:
def seed_everything(seed=42): 
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything()

# simsiam

In [15]:
num_workers = 2
batch_size = 128
seed = 1
epochs = 10
input_size = 224

# dimension of the embeddings
num_ftrs = 512
# dimension of the output of the prediction and projection heads
out_dim = proj_hidden_dim = 512
# the prediction head uses a bottleneck architecture
pred_hidden_dim = 128
# use 2 layers in the projection head
num_mlp_layers = 2

In [16]:
# define the augmentations for self-supervised learning
collate_fn = lightly.data.ImageCollateFunction(
    input_size=input_size,
    # require invariance to flips and rotations
    hf_prob=0.5,
    vf_prob=0.5,
    rr_prob=0.5,
    # satellite images are all taken from the same height
    # so we use only slight random cropping
    min_scale=0.5,
    # use a weak color jitter for invariance w.r.t small color changes
    # 元々の学習でいらないaugmentationなので、simsiamにおいても不要？
    #cj_prob=0.2,
    #cj_bright=0.1,
    #cj_contrast=0.1,
    #cj_hue=0.1,
    #cj_sat=0.1,
    # addtional items
    gaussian_blur=0.2,
    random_gray_scale=0.2
)

# create a lightly dataset for training, since the augmentations are handled
# by the collate function, there is no need to apply additional ones here
dataset_train_simsiam = lightly.data.LightlyDataset(
    input_dir="../input/atma11-dataset/photos/"
)

# create a dataloader for training
dataloader_train_simsiam = torch.utils.data.DataLoader(
    dataset_train_simsiam,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    drop_last=True,
    num_workers=num_workers
)

# create a torchvision transformation for embedding the dataset after training
# here, we resize the images to match the input size during training and apply
# a normalization of the color channel based on statistics from imagenet
test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((input_size, input_size)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=lightly.data.collate.imagenet_normalize['mean'],
        std=lightly.data.collate.imagenet_normalize['std'],
    )
])

# create a lightly dataset for embedding
dataset_test = lightly.data.LightlyDataset(
    input_dir="../input/atma11-dataset/photos/",
    transform=test_transforms
)

# create a dataloader for embedding
dataloader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    num_workers=num_workers
)

In [17]:
# we use a pretrained resnet for this tutorial to speed
# up training time but you can also train one from scratch
# Do not use pretrained Model
resnet = torchvision.models.resnet18(pretrained=False)
backbone = nn.Sequential(*list(resnet.children())[:-1])

#vit = timm.create_model('vit_tiny_r_s16_p8_224', pretrained=False)
#backbone = nn.Sequential(*list(vit.children())[:-1])

# create the SimSiam model using the backbone from above
model = lightly.models.SimSiam(
    backbone,
    num_ftrs=num_ftrs,
    proj_hidden_dim=pred_hidden_dim,
    pred_hidden_dim=pred_hidden_dim,
    out_dim=out_dim,
    num_mlp_layers=num_mlp_layers
)

In [18]:
# SimSiam uses a symmetric negative cosine similarity loss
criterion = lightly.loss.SymNegCosineSimilarityLoss()

# scale the learning rate
lr = 0.05 * batch_size / 256
# use SGD with momentum and weight decay
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=lr,
    momentum=0.9,
    weight_decay=5e-4
)

In [19]:
# model.to(device)

# avg_loss = 0.
# avg_output_std = 0.
# for e in range(epochs):

#     for (x0, x1), _, _ in dataloader_train_simsiam:

#         # move images to the gpu
#         x0 = x0.to(device)
#         x1 = x1.to(device)

#         # run the model on both transforms of the images
#         # the output of the simsiam model is a y containing the predictions
#         # and projections for each input x
#         y0, y1 = model(x0, x1)

#         # backpropagation
#         loss = criterion(y0, y1)
#         loss.backward()

#         optimizer.step()
#         optimizer.zero_grad()

#         # calculate the per-dimension standard deviation of the outputs
#         # we can use this later to check whether the embeddings are collapsing
#         output, _ = y0
#         output = output.detach()
#         output = torch.nn.functional.normalize(output, dim=1)

#         output_std = torch.std(output, 0)
#         output_std = output_std.mean()

#         # want to minimize
#         # use moving averages to track the loss and standard deviation
#         w = 0.9
#         avg_loss = w * avg_loss + (1 - w) * loss.item()
#         avg_output_std = w * avg_output_std + (1 - w) * output_std.item()

#     # the level of collapse is large if the standard deviation of the l2
#     # normalized output is much smaller than 1 / sqrt(dim)
#     collapse_level = max(0., 1 - math.sqrt(out_dim) * avg_output_std)
#     # print intermediate results
#     print(f'[Epoch {e:3d}] '
#         f'Loss = {avg_loss:.2f} | '
#         f'Collapse Level: {collapse_level:.2f} / 1.00')

In [20]:
# torch.save(model.backbone.state_dict(), "ssl.pth")

# main training and validation

In [21]:
def make_model():
    return resnet18(pretrained=False)
model = make_model()

In [22]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [23]:
train_meta_df = train_df[['target', 'object_id']].copy()
train_meta_df['object_path'] = train_meta_df['object_id'].map(to_img_path)

dataset = AtmaDataset(meta_df=train_meta_df)
loader = data.DataLoader(dataset=dataset, batch_size=54, num_workers=4)

groups = train_df["art_series_id"].values

In [24]:
fold = stratified_group_k_fold(train_df, train_df['target'], groups, k=n_folds, seed=12)
oof = np.zeros((len(train_df), ), dtype=np.float32)

for i, (idx_tr, idx_valid) in enumerate(fold):
    print(f"fold{i}")
    model = make_model()
    #nn.Sequential(*list(model.children())[:-1]).load_state_dict(torch.load("ssl.pth"))
    
    model.fc = nn.Linear(in_features=512, out_features=1, bias=True)
    
    model.to(device)

    oof_i = run_fold(
        model=model, 
        train_df=train_meta_df.iloc[idx_tr], 
        valid_df=train_meta_df.iloc[idx_valid], 
        y_valid=train_meta_df['target'].values[idx_valid],
        n_epochs=200,
        n_fold = i
    )

    oof[idx_valid] = oof_i

fold0
start 1
      train_loss    valid_rmse    epoch
--  ------------  ------------  -------
 0             0       1.02513        1
validation score is improved!! inf -> 1.0251
start 2
start 3
start 4
start 5
      train_loss    valid_rmse    epoch
--  ------------  ------------  -------
 0             0       1.00099        5
validation score is improved!! 1.0251 -> 1.0010
start 6
      train_loss    valid_rmse    epoch
--  ------------  ------------  -------
 0             0       1.00033        6
validation score is improved!! 1.0010 -> 1.0003
start 7
start 8
start 9
start 10
      train_loss    valid_rmse    epoch
--  ------------  ------------  -------
 0             0      0.946789       10
validation score is improved!! 1.0003 -> 0.9468
start 11
start 12
start 13
      train_loss    valid_rmse    epoch
--  ------------  ------------  -------
 0             0      0.917269       13
validation score is improved!! 0.9468 -> 0.9173
start 14
start 15
start 16
start 17
start 18
star

In [25]:
mean_squared_error(train_df['target'], oof)

0.6106440034798106

# prediction

In [26]:
# train と似たようなことをするので、次回から楽したいとおもって `create_metadata` という関数を作りました
test_meta_df = create_metadata(test_df)

# 学習時のデータ拡張はオフにしたいので is_train=False としている
test_dataset = AtmaDataset(meta_df=test_meta_df, is_train=False)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=128, drop_last=False, num_workers=4)

test_predictions = []

for i in range(n_folds):
    model = make_model()
    
    model.fc = nn.Linear(in_features=512, out_features=1, bias=True)

    # 最も良かった重みを読みだす
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html
    model.load_state_dict(torch.load('model_best'+str(i)+'.pth'))

    # GPU環境で予測するため `to` で変換
    model.to(device)

    y_pred_i = predict(model, loader=test_loader)

    test_predictions.append(y_pred_i)
    
pred_mean = np.array(test_predictions).mean(axis=0)

# submission

In [27]:
pd.DataFrame({"target": pred_mean}).to_csv("submission"+str(oof)+".csv", index=False)