# Модель - Unet (heatmap regression)

In [1]:
MODEL_NAME = 'unet-heatmap-regression'

# Импорты

In [2]:
import os
import sys
import pickle
sys.path.append(os.path.abspath('../'))
from datetime import datetime

from tqdm.notebook import tqdm_notebook as tqdm
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

import torch, torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision.models as models

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from dataset import ThousandLandmarksDataset
from augmentations import (
    ScaleMinSideToSize,
    CropCenter,
    TransformByKeys,
    HorisontalFlip,
    RandomRotation,
    Grayscale,
    RandomConvertScaleAbs,
    RandomGammaCorrection,
    RandomBorderCutout,
    RandomRectCutout,
    OneOf,
    Heatmap
)

from routines import train, validate, predict, create_submission, predict_unet, predict_unet2

from models.unet import UNet

In [4]:
np.random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7f442d9c55f0>

In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [8]:
import warnings
warnings.filterwarnings('ignore')

# Справочная информация

## Расположение точек

- 0:273 - овал лица
- 273:337 - левая бровь
- 337:401 - правая бровь
- 401:527 - нос
- 527:587 - вертикальная линия от губ до лба по носу
- 587:714 - левый глаз (контур и яблоко)
- 714:841 - правый глаз (контур и яблоко)
- 841:906 - верхняя губа
- 906:969 - нижняя губа
- 969:970 - центр левого глаза
- 970:971 - центр правого глаза

- 650:651 - точка левого глаза слева
- 682:683 - точка левого глаза справа
- 777:778 - точка правого глаза справа
- 808:809 - точка правого глаза слева

## Константы

In [9]:
# Отрезаемый размер
CROP_SIZE = 128

# Число точек для предсказания
NUM_PTS = 971

# Процент тренировочной выборки при разбиении
TRAIN_SIZE = 0.8

# Размер батча
TRAIN_BATCH_SIZE = 16

# Чтение данных

In [10]:
TRAIN_DATA_PATH = '/home/kovalexal/Spaces/learning/made/made_cv/competitions/facial_points/data/train/'

In [11]:
train_transforms = transforms.Compose([
    # Базовая предобработка
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    
    # Переводим ключевые точки в heatmap
    Heatmap(sigma=(3, 3)),

    # Обработка для подачи на обучение
    TransformByKeys(transforms.ToPILImage(), ('image',)),
    TransformByKeys(transforms.ToTensor(), ('image',)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ('image',)),
])


val_transforms = transforms.Compose([
    # Базовая предобработка
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    
    # Переводим ключевые точки в heatmap
    Heatmap(sigma=(3, 3)),

    # Обработка для подачи на обучение
    TransformByKeys(transforms.ToPILImage(), ('image',)),
    TransformByKeys(transforms.ToTensor(), ('image',)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ('image',)),
])

train_dataset = ThousandLandmarksDataset(TRAIN_DATA_PATH, train_transforms, split='train', TRAIN_SIZE=TRAIN_SIZE)

len(train_dataset)

val_dataset = ThousandLandmarksDataset(TRAIN_DATA_PATH, val_transforms, split='val', TRAIN_SIZE=TRAIN_SIZE)

len(val_dataset)

# Обучение и валидация

train_dataloader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, num_workers=8, pin_memory=True, shuffle=True, drop_last=True)

val_dataloader = DataLoader(val_dataset, batch_size=TRAIN_BATCH_SIZE, num_workers=8, pin_memory=True, shuffle=False, drop_last=False)

# learning-rate
LEARNING_RATE = 1e-3

# Число эпох
N_EPOCHS = 20

# tensorboard
writer = SummaryWriter(log_dir='./{}'.format(MODEL_NAME), comment=MODEL_NAME)

# Задаем модель
model = UNet(3, NUM_PTS)
model.to(device)
# writer.add_graph(model, next(iter(val_dataloader))['image'].to(device))

# Задаем параметры оптимизации
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=True)
criterion = F.mse_loss

# Временные параметры для выбора наилучшего результата
best_val_loss, best_model_state_dict = np.inf, {}

from torchsummary import summary

with torch.no_grad():
    print(summary(model, next(iter(val_dataloader))['image'].shape[1:]))

CURRENT_EPOCH = 0

for epoch in range(CURRENT_EPOCH, N_EPOCHS):
    train_loss = train(epoch, model, train_dataloader, criterion, optimizer, device=device, writer=writer, log_every=100)
    writer.add_scalar('EpochLoss/train', train_loss, epoch)
    
    val_loss = validate(epoch, model, val_dataloader, criterion, device=device, writer=writer, log_every=20)
    writer.add_scalar('EpochLoss/val', val_loss, epoch)
    
    print('Epoch #{:2}:\ttrain loss: {:5.5}\tval loss: {:5.5}'.format(epoch, train_loss, val_loss))
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state_dict = model.state_dict()
        with open('{}_best.pth'.format(MODEL_NAME), 'wb') as fp:
            torch.save(model.state_dict(), fp)
            
    CURRENT_EPOCH += 1

In [12]:
model = UNet(3, NUM_PTS)
model.to(device)

UNet(
  (inc): DoubleConv(
    (double_conv): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (down1): Down(
    (maxpool_conv): Sequential(
      (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (1): DoubleConv(
        (double_conv): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (4): BatchNorm2d(128, eps=1e-05, moment

In [13]:
with open('{}_train_10.pth'.format(MODEL_NAME), 'rb') as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

# Предсказание и сохранение результата

In [14]:
# TEST_DATA_PATH = '/home/kovalexal/Spaces/learning/made/made_cv/competitions/facial_points/data/test/'
TEST_DATA_PATH = '/tmp/landmarks_test'

In [15]:
test_dataset = ThousandLandmarksDataset(TEST_DATA_PATH, train_transforms, split='test')

In [16]:
# Размер батча
TEST_BATCH_SIZE = 32

test_dataloader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE, num_workers=4, pin_memory=True, shuffle=False, drop_last=False)

# with open('{}_best.pth'.format(MODEL_NAME), 'rb') as fp:
#     best_state_dict = torch.load(fp, map_location="cpu")
#     model.load_state_dict(best_state_dict)

# test_predictions = predict(model, test_dataloader, device)
# with open('{}_test_predictions.pkl'.format(MODEL_NAME), 'wb') as fp:
#     pickle.dump({'image_names': test_dataset.image_names, 'landmarks': test_predictions}, fp)

# create_submission(TEST_DATA_PATH, test_predictions, '{}_submit.csv'.format(MODEL_NAME))

In [17]:
torch.cuda.empty_cache()

In [18]:
test_predictions = predict_unet2(model, test_dataloader, device)

HBox(children=(FloatProgress(value=0.0, description='test prediction...', max=3120.0, style=ProgressStyle(desc…




In [19]:
with open('/run/media/kovalexal/DATA/{}_test_predictions_10_final.pkl'.format(MODEL_NAME), 'wb') as fp:
    pickle.dump({'image_names': test_dataset.image_names, 'landmarks': test_predictions}, fp)

In [20]:
create_submission(TEST_DATA_PATH, test_predictions, '/run/media/kovalexal/DATA/{}_submit_10_final.csv'.format(MODEL_NAME))