In [1]:
MODEL_NAME = 'model'

# Imports

In [2]:
import os
import sys
import pickle
sys.path.append(os.path.abspath('../'))
from datetime import datetime

from tqdm.notebook import tqdm_notebook as tqdm
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

import torch, torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision.models as models

In [3]:
from dataset import ThousandLandmarksDataset
from augmentations import ScaleMinSideToSize, CropCenter, TransformByKeys
from routines import train, validate, predict, create_submission

In [4]:
np.random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x292bebec290>

In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [8]:
# import warnings
# warnings.filterwarnings('ignore')

## Constants

In [9]:
CROP_SIZE = 128

NUM_PTS = 971

TRAIN_SIZE = 0.8

TRAIN_BATCH_SIZE = 512

# Reading data

In [10]:
TRAIN_DATA_PATH = r'.\data\train'

In [11]:
train_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ('image',)),
    TransformByKeys(transforms.ToTensor(), ('image',)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ('image',)),
])

In [12]:
train_dataset = ThousandLandmarksDataset(TRAIN_DATA_PATH, train_transforms, split='train', TRAIN_SIZE=TRAIN_SIZE)

In [13]:
len(train_dataset)

315144

In [14]:
val_dataset = ThousandLandmarksDataset(TRAIN_DATA_PATH, train_transforms, split='val', TRAIN_SIZE=TRAIN_SIZE)

In [15]:
len(val_dataset)

78786

# Train and validation

In [16]:
train_dataloader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True, shuffle=True, drop_last=True)

In [17]:
val_dataloader = DataLoader(val_dataset, batch_size=TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True, shuffle=False, drop_last=False)

In [22]:
!nvidia-smi

Sun May 10 23:15:41 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 431.87       Driver Version: 431.87       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 166... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   58C    P0    29W /  N/A |   4420MiB /  6144MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

In [21]:
# learning-rate
LEARNING_RATE = 1e-3

# Число эпох
N_EPOCHS = 50

# tensorboard
writer = SummaryWriter(log_dir='./{}'.format(MODEL_NAME), comment=MODEL_NAME)

# Задаем модель
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
model.to(device)
writer.add_graph(model, next(iter(val_dataloader))['image'].to(device))

# Задаем параметры оптимизации
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=True)
criterion = F.mse_loss

# Временные параметры для выбора наилучшего результата
best_val_loss, best_model_state_dict = np.inf, {}

In [23]:
CURRENT_EPOCH = 0

In [24]:
for epoch in range(CURRENT_EPOCH, N_EPOCHS):
    train_loss = train(epoch, model, train_dataloader, criterion, optimizer, device=device, writer=writer, log_every=100)
    writer.add_scalar('EpochLoss/train', train_loss, epoch)
    
    val_loss = validate(epoch, model, val_dataloader, criterion, device=device, writer=writer, log_every=20)
    writer.add_scalar('EpochLoss/val', val_loss, epoch)
    
    print('Epoch #{:2}:\ttrain loss: {:5.5}\tval loss: {:5.5}'.format(epoch, train_loss, val_loss))
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state_dict = model.state_dict()
        with open('{}_best.pth'.format(MODEL_NAME), 'wb') as fp:
            torch.save(model.state_dict(), fp)
            
    CURRENT_EPOCH += 1

HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 0:	train loss: 305.4	val loss: 20.58


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 1:	train loss: 11.296	val loss: 5.6919


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 2:	train loss: 4.5741	val loss: 3.8722


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 3:	train loss: 3.5165	val loss: 3.3044


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 4:	train loss: 3.0379	val loss: 2.8184


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 5:	train loss: 2.6995	val loss: 2.5856


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 6:	train loss: 2.4499	val loss: 2.3951


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 7:	train loss: 2.3029	val loss: 2.2937


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 8:	train loss: 2.196	val loss: 2.2415


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch # 9:	train loss: 2.1081	val loss: 2.3706


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #10:	train loss: 2.0188	val loss: 2.1658


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #11:	train loss: 1.9681	val loss: 2.4836


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #12:	train loss: 1.885	val loss: 2.1099


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #13:	train loss: 1.8248	val loss: 2.0764


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #14:	train loss: 1.7661	val loss: 2.1086


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #15:	train loss: 1.6932	val loss: 2.0757


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #16:	train loss: 1.6462	val loss: 2.0497


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #17:	train loss: 1.5843	val loss: 1.9516


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #18:	train loss: 1.5448	val loss: 1.9909


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #19:	train loss:   1.5	val loss: 1.9769


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #20:	train loss: 1.4416	val loss: 1.9211


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #21:	train loss: 1.404	val loss: 1.8653


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #22:	train loss: 1.3618	val loss: 1.8885


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #23:	train loss: 1.3297	val loss: 1.867


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #24:	train loss: 1.303	val loss: 1.847


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #25:	train loss: 1.2696	val loss: 1.8548


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #26:	train loss: 1.2395	val loss: 1.8776


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #27:	train loss: 1.2196	val loss: 1.8529


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #28:	train loss: 1.1985	val loss: 1.8697


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #29:	train loss: 1.1713	val loss: 1.8086


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #30:	train loss: 1.1531	val loss: 1.863


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #31:	train loss: 1.1302	val loss: 1.8381


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #32:	train loss: 1.1166	val loss: 1.8444


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #33:	train loss: 1.0934	val loss: 1.8479


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #34:	train loss: 1.0829	val loss: 1.8235


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #35:	train loss: 1.0568	val loss: 1.8077


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #36:	train loss: 1.0403	val loss: 1.8673


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #37:	train loss: 1.0208	val loss: 1.8585


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='validation...', max=154.0, style=ProgressStyle(descriptio…


Epoch #38:	train loss: 1.0034	val loss: 1.8093


HBox(children=(FloatProgress(value=0.0, description='training...', max=615.0, style=ProgressStyle(description_…

KeyboardInterrupt: 

# Prediction and result

In [35]:
TEST_DATA_PATH = r'.\data\test'

In [36]:
test_dataset = ThousandLandmarksDataset(TEST_DATA_PATH, train_transforms, split='test')

In [37]:

TEST_BATCH_SIZE = 512

test_dataloader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE, num_workers=8, pin_memory=True, shuffle=False, drop_last=False)

with open('{}_best.pth'.format(MODEL_NAME), 'rb') as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

test_predictions = predict(model, test_dataloader, device)
with open('{}_test_predictions.pkl'.format(MODEL_NAME), 'wb') as fp:
    pickle.dump({'image_names': test_dataset.image_names, 'landmarks': test_predictions}, fp)

create_submission(TEST_DATA_PATH, test_predictions, '{}_submit.csv'.format(MODEL_NAME))

HBox(children=(FloatProgress(value=0.0, description='test prediction...', max=195.0, style=ProgressStyle(descr…


