In [45]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import cv2
import os
import torch
from tqdm import tqdm
from torch.utils import data
import pickle
import sys
from argparse import ArgumentParser, Namespace
import joblib
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.nn import functional as fnn
from torch.utils.data import DataLoader
from torchvision import transforms
from scipy import interpolate
import timm
from scipy.optimize import minimize

In [57]:
CROP_SIZE = 256
NUM_PTS = 971
TRAIN_SIZE = 0.8

In [58]:
class ScaleMinSideToSize(object):
    def __init__(self, size=(CROP_SIZE, CROP_SIZE), elem_name='image'):
        # self.size = torch.tensor(size, dtype=torch.float)
        self.size = np.asarray(size, dtype=np.float)
        self.elem_name = elem_name

    def __call__(self, sample):
        h, w, _ = sample[self.elem_name].shape
        if h > w:
            f = self.size[0] / w
        else:
            f = self.size[1] / h

        sample[self.elem_name] = cv2.resize(sample[self.elem_name], None, fx=f, fy=f, interpolation=cv2.INTER_AREA)
        sample["scale_coef"] = f

        if 'landmarks' in sample:
            landmarks = sample['landmarks'].reshape(-1, 2).float()
            landmarks = landmarks * f
            sample['landmarks'] = landmarks.reshape(-1)

        return sample


class CropCenter(object):
    def __init__(self, size=CROP_SIZE, elem_name='image'):
        self.size = size
        self.elem_name = elem_name

    def __call__(self, sample):
        img = sample[self.elem_name]
        h, w, _ = img.shape
        margin_h = (h - self.size) // 2
        margin_w = (w - self.size) // 2
        sample[self.elem_name] = img[margin_h:margin_h + self.size, margin_w:margin_w + self.size]
        sample["crop_margin_x"] = margin_w
        sample["crop_margin_y"] = margin_h

        if 'landmarks' in sample:
            landmarks = sample['landmarks'].reshape(-1, 2)
            landmarks -= torch.tensor((margin_w, margin_h), dtype=landmarks.dtype)[None, :]
            sample['landmarks'] = landmarks.reshape(-1)

        return sample


class TransformByKeys(object):
    def __init__(self, transform, names):
        self.transform = transform
        self.names = set(names)

    def __call__(self, sample):
        for name in self.names:
            if name in sample:
                sample[name] = self.transform(sample[name])

        return sample



class ThousandLandmarksDataset(data.Dataset):
    def __init__(self, root, transforms, split="train"):
        super(ThousandLandmarksDataset, self).__init__()
        self.root = root
        landmark_file_name = './contest01_data/contest01_data/train/landmarks.csv' if split != "test" \
            else os.path.join(root, "test_points.csv")
        images_root = os.path.join(root, "images")

        self.image_names = []
        self.landmarks = []

        with open(landmark_file_name, "rt") as fp:
            num_lines = sum(1 for line in fp)
        num_lines -= 1  # header

        with open(landmark_file_name, "rt") as fp:
            for i, line in tqdm(enumerate(fp), total=num_lines + 1):
                if i == 0:
                    continue  # skip header
                if split == "train" and i == int(TRAIN_SIZE * num_lines):
                    break  # reached end of train part of data
                elif split == "val" and i < int(TRAIN_SIZE * num_lines):
                    continue  # has not reached start of val part of data
                elements = line.strip().split('\t')
                image_name = os.path.join(images_root, elements[0])
                self.image_names.append(image_name)

                if split in ("train", "val"):
                    landmarks = list(map(np.float, elements[1:]))
                    landmarks = np.array(landmarks, dtype=np.int).reshape((len(landmarks) // 2, 2))
                    self.landmarks.append(landmarks)

        if split in ("train", "val"):
            self.landmarks = torch.as_tensor(self.landmarks)
        else:
            self.landmarks = None

        self.transforms = transforms

    def __getitem__(self, idx):
        sample = {}
        if self.landmarks is not None:
            landmarks = self.landmarks[idx]
            sample["landmarks"] = landmarks

        image = cv2.imread(self.image_names[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        sample["image"] = image

        if self.transforms is not None:
            sample = self.transforms(sample)
        return sample

    def __len__(self):
        return len(self.image_names)


def restore_landmarks(landmarks, f, margins):
    dx, dy = margins
    landmarks[:, 0] += dx
    landmarks[:, 1] += dy
    landmarks /= f
    return landmarks


def restore_landmarks_batch(landmarks, fs, margins_x, margins_y):
    landmarks[:, :, 0] += margins_x[:, None]
    landmarks[:, :, 1] += margins_y[:, None]
    landmarks /= fs[:, None, None]
    return landmarks


def create_submission(path_to_data, test_predictions, path_to_submission_file):
    test_dir = os.path.join(path_to_data, "test")

    output_file = path_to_submission_file
    wf = open(output_file, 'w')
    wf.write(SUBMISSION_HEADER)

    mapping_path = os.path.join(test_dir, 'test_points.csv')
    mapping = pd.read_csv(mapping_path, delimiter='\t')

    for i, row in mapping.iterrows():
        file_name = row[0]
        point_index_list = np.array(eval(row[1]))
        points_for_image = test_predictions[i]
        needed_points = points_for_image[point_index_list].astype(np.int)
        wf.write(file_name + ',' + ','.join(map(str, needed_points.reshape(2 * len(point_index_list)))) + '\n')

In [59]:
def parse_arguments():
    parser = Namespace(name='first_try',
                      data='./contest01_data/contest01_data',
                      batch_size=128,
                      epochs=35,
                      learning_rate=2e-3,
                      gpu=True)
    return parser


def train(model, loader, loss_fn, optimizer, device):
    model.train()
    train_loss = []
    for batch in tqdm(loader, total=len(loader), desc="training..."):
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"].to(device)  # B x (2 * NUM_PTS)
        with autocast():
            pred_landmarks = model(images)  # B x (2 * NUM_PTS)
            loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss.append(loss.item())
        optimizer.zero_grad()
        #loss.backward()
        #optimizer.step()

    return np.mean(train_loss)


def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss, real_val_loss = [], []
    for batch in tqdm(loader, total=len(loader), desc="validation..."):
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        val_loss.append(loss.item())

        # Расчет "правильного" лосса
        fs = batch["scale_coef"].numpy()
        # Вытаскиваем инфо о кромках
        margins_x = batch["crop_margin_x"].numpy()
        margins_y = batch["crop_margin_y"].numpy()
        # Пересчитываем в исходные координаты предсказания модели
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2)) 
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y) 
        # Пересчитываем в исходные координаты ground_true - координаты
        landmarks = landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2)) 
        real_landmarks = restore_landmarks_batch(landmarks, fs, margins_x, margins_y)
        # Добавяем MSE в список real_val_loss
        real_loss = (prediction.reshape(-1) - real_landmarks.reshape(-1)) ** 2
        real_val_loss.append(np.mean(real_loss))
    
    return np.mean(val_loss), np.mean(real_val_loss)


def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    for i, batch in enumerate(tqdm(loader, total=len(loader), desc="test prediction...")):
        images = batch["image"].to(device)

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction
    return predictions

In [60]:
args = parse_arguments()

In [61]:
test_transforms = transforms.Compose([
        ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
        CropCenter(CROP_SIZE),
        TransformByKeys(transforms.ToPILImage(), ("image",)),
        TransformByKeys(transforms.ToTensor(), ("image",)),
        TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]), ("image",)),
    ])

In [62]:
train_dataset = ThousandLandmarksDataset(os.path.join(args.data, "train"), test_transforms, split="train")
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True,
                                  shuffle=True, drop_last=True)
val_dataset = ThousandLandmarksDataset(os.path.join(args.data, "train"), test_transforms, split="val")
val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True,
                                shuffle=False, drop_last=False)

 80%|███████▉  | 50882/63701 [01:05<00:05, 2523.03it/s]
  0%|          | 0/63701 [00:00<?, ?it/s][A
 16%|█▌        | 10240/63701 [00:00<00:00, 102394.97it/s][A
 30%|██▉       | 19040/63701 [00:00<00:00, 97604.45it/s] [A
 43%|████▎     | 27323/63701 [00:00<00:00, 92646.25it/s][A
 55%|█████▌    | 35036/63701 [00:00<00:00, 87369.32it/s][A
 69%|██████▉   | 44131/63701 [00:00<00:00, 88412.61it/s][A
 81%|████████  | 51340/63701 [00:00<00:00, 54693.85it/s][A
 90%|████████▉ | 57255/63701 [00:03<00:00, 7154.56it/s] [A
100%|██████████| 63701/63701 [00:05<00:00, 10806.07it/s][A


In [38]:
models_to_blend = {
    'eff_net_crop256': {
        'model': timm.create_model('efficientnet_b3a'),
        'filepath': 'models_to_blend/eff_net_crop256.pth',
        'is_timm': True
    },
    'wide_resnet_crop128': {
        'model': models.wide_resnet101_2(),
        'filepath': 'models_to_blend/wide_resnet_crop128.pth',
        'is_timm': False
    },
    'resnet152_crop128': {
        'model': models.resnet152(),
        'filepath': 'models_to_blend/resnet152_crop128.pth',
        'is_timm': False
    }
}

for name_model, model_dict in models_to_blend.items():
    tmp_model = model_dict['model']
    if not model_dict['is_timm']:
        tmp_model.fc = nn.Linear(tmp_model.fc.in_features, 2 * NUM_PTS, bias=True)
    else:
        tmp_model.classifier = nn.Linear(tmp_model.classifier.in_features, 2 * NUM_PTS, bias=True)
    with open(model_dict['filepath'], "rb") as fp:
        best_state_dict = torch.load(fp, map_location="cpu")
        tmp_model.load_state_dict(best_state_dict)
    models_to_blend[name_model] = tmp_model

In [43]:
def get_loss_with_weights(weights, preds_by_model, true_landmarks, loss_fn):
    preds_by_model, true_landmarks
    sum_pred_landmarks = np.sum(weight * pred_landmark for weight, pred_landmark in zip(weights, pred_landmarks_by_model.values()))
    blend_pred_landmarks = sum_pred_landmarks / np.sum(weights)
    loss = loss_fn(torch.tensor(blend_pred_landmarks), torch.tensor(true_landmarks), reduction="mean")
    return loss.item()

In [68]:
def get_predictions_by_model(models_dict, loader, device):
    predictions_by_model = {}
    for model_name, model_dict in models_dict.items():
        predictions = predict(model_dict, loader, device)
        predictions_by_model[model_name] = predictions
    return predictions_by_model

In [None]:
preds_by_model = get_predictions_by_model(models_to_blend, val_dataloader, torch.device('cpu'))


test prediction...:   0%|          | 0/100 [00:00<?, ?it/s][A
test prediction...:   1%|          | 1/100 [00:35<58:43, 35.59s/it][A
test prediction...:   2%|▏         | 2/100 [01:12<58:53, 36.05s/it][A
test prediction...:   3%|▎         | 3/100 [01:46<57:14, 35.41s/it][A
test prediction...:   4%|▍         | 4/100 [02:19<55:19, 34.58s/it][A
test prediction...:   5%|▌         | 5/100 [02:47<51:55, 32.79s/it][A
test prediction...:   6%|▌         | 6/100 [03:16<49:26, 31.56s/it][A
test prediction...:   7%|▋         | 7/100 [04:03<56:10, 36.24s/it][A
test prediction...:   8%|▊         | 8/100 [1:02:26<27:30:02, 1076.11s/it][A
test prediction...:   9%|▉         | 9/100 [3:06:14<75:22:23, 2981.80s/it][A

In [None]:
minimize(get_loss_with_weights, models_to_blend, val_dataloader, )

In [67]:
models_to_blend['eff_net_crop256']

EfficientNet(
  (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SwishMe()
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
        (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SwishMe()
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
          (act1): SwishMe()
          (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
        )
        (conv_pw): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
      (1): DepthwiseSeparableConv(
        (conv