## Импорт библиотек

In [1]:
import pretrainedmodels
import os

import torch

import pandas as pd
import numpy as np
import cv2
import gc
import time
import random

from torch.utils import data
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.optim import lr_scheduler
import tqdm
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms

import pickle

from utils import restore_landmarks_batch, restore_landmarks


In [2]:
from models import *

## Параметры

In [3]:
SEED = 42

TRAIN_SIZE = 0.7
NUM_PTS = 194
CROP_SIZE = 220
BATCH_SIZE = 4


random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
TEST_PATH = "./data/test/" 
TRAIN_PATH = "./data/train/"
SUBMISSION_PATH = "./data/train/"
LANDMARKS = "./data/train/landmarks.csv"

In [6]:
SUBMISSION_HEADER = "file_name,Point_M0_X,Point_M0_Y,Point_M1_X,Point_M1_Y,Point_M2_X,Point_M2_Y,Point_M3_X,Point_M3_Y,Point_M4_X,Point_M4_Y,Point_M5_X,Point_M5_Y,Point_M6_X,Point_M6_Y,Point_M7_X,Point_M7_Y,Point_M8_X,Point_M8_Y,Point_M9_X,Point_M9_Y,Point_M10_X,Point_M10_Y,Point_M11_X,Point_M11_Y,Point_M12_X,Point_M12_Y,Point_M13_X,Point_M13_Y,Point_M14_X,Point_M14_Y,Point_M15_X,Point_M15_Y,Point_M16_X,Point_M16_Y,Point_M17_X,Point_M17_Y,Point_M18_X,Point_M18_Y,Point_M19_X,Point_M19_Y,Point_M20_X,Point_M20_Y,Point_M21_X,Point_M21_Y,Point_M22_X,Point_M22_Y,Point_M23_X,Point_M23_Y,Point_M24_X,Point_M24_Y,Point_M25_X,Point_M25_Y,Point_M26_X,Point_M26_Y,Point_M27_X,Point_M27_Y,Point_M28_X,Point_M28_Y,Point_M29_X,Point_M29_Y\n"


In [7]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.max_memory_allocated(device='cuda'))
print(torch.cuda.empty_cache())

1.1.0
10.0
7401
True
1
0
None


In [8]:
class ScaleMinSideToSize(object):
    def __init__(self, size=(CROP_SIZE, CROP_SIZE), elem_name='image'):
        self.size = torch.tensor(size, dtype=torch.float)
        self.elem_name = elem_name

    def __call__(self, sample):
        h, w, _ = sample[self.elem_name].shape
        if h > w:
            f = self.size[0] / w
        else:
            f = self.size[1] / h

        sample[self.elem_name] = cv2.resize(sample[self.elem_name], None, fx=f, fy=f, interpolation=cv2.INTER_AREA)
        sample["scale_coef"] = f

        if 'landmarks' in sample:
            landmarks = sample['landmarks'].reshape(-1, 2).float()
            landmarks = landmarks * f
            sample['landmarks'] = landmarks.reshape(-1)

        return sample


class CropCenter(object):
    def __init__(self, size=128, elem_name='image'):
        self.size = size
        self.elem_name = elem_name

    def __call__(self, sample):
        img = sample[self.elem_name]
        h, w, _ = img.shape
        margin_h = (h - self.size) // 2
        margin_w = (w - self.size) // 2
        sample[self.elem_name] = img[margin_h:margin_h + self.size, margin_w:margin_w + self.size]
        sample["crop_margin_x"] = margin_w
        sample["crop_margin_y"] = margin_h

        if 'landmarks' in sample:
            landmarks = sample['landmarks'].reshape(-1, 2)
            landmarks -= torch.tensor((margin_w, margin_h), dtype=landmarks.dtype)[None, :]
            sample['landmarks'] = landmarks.reshape(-1)

        return sample


class TransformByKeys(object):
    def __init__(self, transform, names):
        self.transform = transform
        self.names = set(names)

    def __call__(self, sample):
        for name in self.names:
            if name in sample:
                sample[name] = self.transform(sample[name])

        return sample


In [9]:
class LandmarksDataset(data.Dataset):
    def __init__(self, root, transforms, split="train"):
        super(LandmarksDataset, self).__init__()
        self.root = root
        landmark_file_name = os.path.join(root, 'landmarks.csv') if split is not "test" \
            else os.path.join(root, "test_points.csv")
        images_root = os.path.join(root, "images")

        self.image_names = []
        self.landmarks = []

        with open(landmark_file_name, "rt") as fp:
            num_lines = sum(1 for line in fp)
        num_lines -= 1  # header

        with open(landmark_file_name, "rt") as fp:
            for i, line in tqdm.tqdm(enumerate(fp)):
                if i == 0:
                    continue  # skip header
                if split == "train" and i == int(TRAIN_SIZE * num_lines):
                    break  # reached end of train part of data
                elif split == "val" and i < int(TRAIN_SIZE * num_lines):
                    continue  # has not reached start of val part of data
                elements = line.strip().split(",")
                image_name = os.path.join(images_root, elements[0])
                self.image_names.append(image_name)

                if split in ("train", "val"):
                    landmarks = list(map(np.int16, elements[1:]))
                    landmarks = np.array(landmarks, dtype=np.int16).reshape((len(landmarks) // 2, 2))
                    self.landmarks.append(landmarks)

        if split in ("train", "val"):
            self.landmarks = torch.as_tensor(self.landmarks)
        else:
            self.landmarks = None

        self.transforms = transforms

    def __getitem__(self, idx):
        sample = {}
        if self.landmarks is not None:
            landmarks = self.landmarks[idx]
            sample["landmarks"] = landmarks

        image = cv2.imread(self.image_names[idx])
        
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        sample["image"] = image

        if self.transforms is not None:
            sample = self.transforms(sample)

        return sample

    def __len__(self):
        return len(self.image_names)


In [10]:

def train(model, loader, loss_fn, optimizer,  device):
    model.train()
    train_loss = []
    
        
    for batch in tqdm.tqdm(loader, total=len(loader), desc="training..."):
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"]  # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu()  # B x (2 * NUM_PTS)
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        train_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return np.mean(train_loss, dtype=np.float64)




def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss = []
    for batch in tqdm.tqdm(loader, total=len(loader), desc="validation..."):
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        val_loss.append(loss.item())

    return np.mean(val_loss, dtype=np.float64)



def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    for i, batch in enumerate(tqdm.tqdm(loader, total=len(loader), desc="test prediction...")):
        images = batch["image"].to(device)

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
            pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction

    return predictions

def create_submission(path_to_data, test_predictions, path_to_submission_file):
    test_dir = os.path.join(path_to_data)

    output_file = path_to_submission_file
    wf = open(output_file, 'w')
    wf.write(SUBMISSION_HEADER)

    mapping_path = os.path.join(test_dir, 'test_points.csv')
    mapping = pd.read_csv(mapping_path, delimiter=',')
    
    for i, row in mapping.iterrows():
        
        
        file_name = row[0]

        point_index_list = np.array(eval(row[1]))
        points_for_image = test_predictions[i]
        needed_points = points_for_image[point_index_list].astype(np.int)
        wf.write(file_name + ',' + ','.join(map(str, needed_points.reshape(2 * len(point_index_list)))) + '\n')


In [11]:
train_transforms = transforms.Compose([
        ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
        CropCenter(CROP_SIZE),
        TransformByKeys(transforms.ToPILImage(), ("image",)),
        TransformByKeys(transforms.ToTensor(), ("image",)),
        TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),
    ])


test_transforms = transforms.Compose([
        ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
        CropCenter(CROP_SIZE),
        TransformByKeys(transforms.ToPILImage(), ("image",)),
        TransformByKeys(transforms.ToTensor(), ("image",)),
        TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),
    ])

In [12]:
train_dataset = LandmarksDataset(os.path.join(TRAIN_PATH), train_transforms, split="train")
train_dataloader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=0, pin_memory=True,
                                   shuffle=True, drop_last=True)
val_dataset = LandmarksDataset(os.path.join(TRAIN_PATH), train_transforms, split="val")
val_dataloader = data.DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=0, pin_memory=True,
                                 shuffle=False, drop_last=False)

968it [00:00, 3342.78it/s]
2001it [00:00, 14857.80it/s]


In [13]:
gc.collect()

69

In [14]:
import torchvision.models as models

In [15]:
model_params = {'num_classes': 2 * NUM_PTS, 'remove_aa_jit': True}
model = TResnetM(model_params)

# model.head = nn.Linear(model.head.in_features, 2 * NUM_PTS, bias=True)
model.to(device)

TResNet(
  (body): Sequential(
    (SpaceToDepth): SpaceToDepthModule()
    (conv1): Sequential(
      (0): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): InPlaceABN(64, eps=1e-05, momentum=0.1, affine=True, activation=leaky_relu[0.01])
    )
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): InPlaceABN(64, eps=1e-05, momentum=0.1, affine=True, activation=leaky_relu[0.001])
        )
        (conv2): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): InPlaceABN(64, eps=1e-05, momentum=0.1, affine=True, activation=identity)
        )
        (relu): ReLU(inplace)
        (se): SEModule(
          (avg_pool): FastGlobalAvgPool2d()
          (fc1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
          (relu): ReLU(inplace)
          (fc2): C

In [16]:
# model = models.resnet50(pretrained=True)
# model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
# model.to(device)

In [20]:
LR = 1e-6
EPOCHS = 20

In [17]:
optimizer = optim.Adam(model.parameters(), lr=LR, amsgrad=True)
loss_fn = fnn.mse_loss
scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [18]:
with open(f"model_TResnetM.pth", "rb") as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

In [21]:
print("Ready for training...")
best_val_loss = np.inf
for epoch in range(EPOCHS):

    train_loss = train(model, train_dataloader, loss_fn, optimizer,  device=device)
    val_loss = validate(model, val_dataloader, loss_fn, device=device)
    scheduler.step()

    print("Epoch #{:2}:\ttrain loss: {:10.7}\tval loss: {:10.7}".format(epoch, train_loss, val_loss))
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"model_TResnetM.pth", "wb") as fp:
            torch.save(model.state_dict(), fp)
    



Ready for training...


training...: 100%|███████████████████████████████████████████████████████████████████| 349/349 [02:56<00:00,  1.98it/s]
validation...: 100%|█████████████████████████████████████████████████████████████████| 151/151 [00:34<00:00,  4.44it/s]


Epoch # 0:	train loss:   192.6374	val loss:   207.2069


training...: 100%|███████████████████████████████████████████████████████████████████| 349/349 [03:21<00:00,  1.73it/s]
validation...: 100%|█████████████████████████████████████████████████████████████████| 151/151 [00:40<00:00,  3.76it/s]


Epoch # 1:	train loss:    191.035	val loss:   208.6915


training...: 100%|███████████████████████████████████████████████████████████████████| 349/349 [03:11<00:00,  1.83it/s]
validation...: 100%|█████████████████████████████████████████████████████████████████| 151/151 [00:34<00:00,  4.43it/s]


Epoch # 2:	train loss:   199.9937	val loss:   210.9045


training...:   3%|█▉                                                                  | 10/349 [00:05<03:13,  1.75it/s]


KeyboardInterrupt: 

In [22]:

test_dataset = LandmarksDataset(os.path.join(TEST_PATH), test_transforms, split="test")
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=0, pin_memory=True,
                                  shuffle=False, drop_last=False)

with open(f"model_TResnetM.pth", "rb") as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

test_predictions = predict(model, test_dataloader, device)


331it [00:00, 232548.51it/s]
test prediction...: 100%|██████████████████████████████████████████████████████████████| 83/83 [00:29<00:00,  2.80it/s]


In [23]:
test_predictions

array([[[ 317.33084106,  377.72793579],
        [ 318.58166504,  389.16964722],
        [ 320.07702637,  401.0145874 ],
        ...,
        [ 398.31900024,  300.82006836],
        [ 407.21337891,  302.56167603],
        [ 416.09152222,  303.44934082]],

       [[ 119.62272644,  550.20263672],
        [ 122.13540649,  572.86401367],
        [ 125.63465118,  596.73095703],
        ...,
        [ 278.41061401,  406.06585693],
        [ 297.35494995,  409.28894043],
        [ 316.40991211,  410.71246338]],

       [[ 675.46734619, 1614.37451172],
        [ 687.48651123, 1704.47644043],
        [ 703.05114746, 1798.50354004],
        ...,
        [1315.46862793, 1026.16235352],
        [1393.34069824, 1039.73693848],
        [1471.80749512, 1045.3236084 ]],

       ...,

       [[ 570.67840576,  662.34161377],
        [ 573.25708008,  688.47094727],
        [ 576.52111816,  715.85992432],
        ...,
        [ 752.57806396,  494.93725586],
        [ 773.01324463,  498.59661865],
        [

In [24]:
with open(f"test_predictions.pkl", "wb") as fp:
    pickle.dump({"image_names": test_dataset.image_names,
                 "landmarks": test_predictions}, fp)

create_submission(TEST_PATH, test_predictions, f"submit_TResnetM.csv")