In [1]:
from PIL import Image, ImageDraw
import numpy as np
import pandas as pd
from time import time
import sys, os
import math

In [2]:
DIR_TRAIN = './data/train'
DIR_TRAIN_IMAGES = "./data/train/images"

In [3]:
# face_boxes = pd.read_csv(os.path.join(DIR_TRAIN, 'clear_coord_train.csv'))
# landmarks = pd.read_csv(os.path.join(DIR_TRAIN, 'landmarks.csv'))
# df = pd.merge(landmarks, face_boxes[['filename', 'top_x', 'top_y', 'bottom_x', 'bottom_y']], left_on='filename', right_on='filename' )
# df.to_csv(os.path.join(DIR_TRAIN, 'landmarks_train.csv'), index=False)

In [4]:
df_landmarks =  pd.read_csv(os.path.join(DIR_TRAIN, 'landmarks_train.csv'))

In [5]:
df_landmarks.head()

Unnamed: 0,filename,Point_M0_X,Point_M0_Y,Point_M1_X,Point_M1_Y,Point_M2_X,Point_M2_Y,Point_M3_X,Point_M3_Y,Point_M4_X,...,Point_M191_X,Point_M191_Y,Point_M192_X,Point_M192_Y,Point_M193_X,Point_M193_Y,top_x,top_y,bottom_x,bottom_y
0,100032540_1.jpg,565,758,564,781,563,805,566,829,569,...,724,586,741,592,760,595,562.98189,363.666817,1093.646972,1062.599962
1,1012675629_1.jpg,428,425,429,442,430,460,432,480,434,...,587,324,598,328,611,330,439.957011,186.500787,813.354643,714.042957
2,118736691_1.jpg,497,643,502,677,507,713,519,750,527,...,968,430,1007,434,1043,422,551.588949,73.517666,1309.302816,1116.513968
3,2365877276_1.jpg,26,328,30,346,35,366,41,384,47,...,183,213,202,210,219,204,-69.420426,37.650558,361.58844,607.614095
4,2366695522_1.jpg,87,778,89,811,92,844,100,879,110,...,341,537,372,542,403,537,-51.343222,140.292166,839.614193,1271.382138


In [6]:
import os

import torch

import pandas as pd
import numpy as np
import cv2
import gc
import time
import random

from torch.utils import data
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.optim import lr_scheduler
import tqdm
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms

In [7]:
SEED = 42

TRAIN_SIZE = 0.7
NUM_PTS = 194
CROP_SIZE = 256
BATCH_SIZE = 4


random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [9]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.max_memory_allocated(device='cuda'))
print(torch.cuda.empty_cache())

1.1.0
10.0
7401
True
1
0
None


In [10]:
class LandmarksDataset(data.Dataset):
    def __init__(self, root, dataframe, transforms = None, CROP_SIZE = CROP_SIZE, NUM_PTS = NUM_PTS):
        super(LandmarksDataset, self).__init__()
        self.root = root
        self.df = dataframe.copy()       
        self.transforms = transforms
        self.CROP_SIZE = CROP_SIZE
        self.NUM_PTS = NUM_PTS
        self.append_size = 1/4 
        
        #  увеличение размера рамки лица (для дополнительной информации)
        self.df['width_bbox'] = abs(self.df['bottom_x'] - self.df['top_x'])
        self.df['height_bbox'] = abs(self.df['bottom_y'] - self.df['top_y'])
        self.df['top_x']  -= self.append_size * self.df['width_bbox']
        self.df['top_y']  -= self.append_size * self.df['height_bbox']
        self.df['bottom_x']  += self.append_size * self.df['width_bbox']
        self.df['bottom_y']  += self.append_size * self.df['height_bbox']
        self.df['width_bbox'] = abs(self.df['bottom_x'] - self.df['top_x'])
        self.df['height_bbox'] = abs(self.df['bottom_y'] - self.df['top_y'])
        
        # координаты меток относительно вернего угла бокса прямоугольника
        for id_point in range(self.NUM_PTS):
            self.df[f'Point_M{id_point}_X'] -= self.df['top_x']
            self.df[f'Point_M{id_point}_Y'] -= self.df['top_y']
            
        self.df_landmarks = self.df.drop(['filename', 'top_x', 'top_y', 'bottom_x', 'bottom_y', 'width_bbox', 'height_bbox'], axis=1)       
        
               
        
    def __len__(self):
        return len(self.df['filename'])
    
    
    def __getitem__(self, idx):
        sample = {}
        row_bbox = self.df.loc[idx]
        row_landmarks = np.array(self.df_landmarks.loc[idx].tolist())
        
        img_file = os.path.join(self.root, row_bbox['filename'])
        img = Image.open(img_file)
        
        # кроп лица
        bbox = [row_bbox['top_x'], row_bbox['top_y'], row_bbox['bottom_x'], row_bbox['bottom_y']]
        img = img.crop(bbox)
        
        # ресайз кропа до размеров CROP_SIZE с сохранением соотношения сторон
        w, h = img.size
        if h > w:
            f = self.CROP_SIZE  / w
        else:
            f = self.CROP_SIZE  / h        
        img = img.resize((int(w*f), int(h*f)))
        row_landmarks = row_landmarks * f
        
        # CropCenter
        w, h = img.size
        margin_h = (h - self.CROP_SIZE) // 2
        margin_w = (w - self.CROP_SIZE) // 2
        img = img.crop([margin_w, margin_h, self.CROP_SIZE+margin_w, self.CROP_SIZE+margin_h])
        row_landmarks = row_landmarks.astype(np.int16).reshape(-1, 2)
        row_landmarks -= np.array((margin_w, margin_h), dtype=np.int16)[None, :]
        row_landmarks =  row_landmarks.reshape(-1)
        
        sample = {"file_name": row_bbox['filename'],
                  "image": img, 
                  "landmarks": torch.from_numpy(row_landmarks.astype(np.float32)),
                  "crop_margin_x": margin_w,
                  "crop_margin_y": margin_h, 
                  "scale_coef": f,
                  "top_x": row_bbox['top_x'],
                  "top_y": row_bbox['top_y']}
        if self.transforms is not None:            
            sample = self.transforms(sample)
            
        return sample

In [11]:
class TransformByKeys(object):
    def __init__(self, transform, names):
        self.transform = transform
        self.names = set(names)

    def __call__(self, sample):
        for name in self.names:
            if name in sample:
                sample[name] = self.transform(sample[name])

        return sample

In [12]:
def train(model, loader, loss_fn, optimizer,  device):
    model.train()
    train_loss = []
    
        
    for batch in tqdm.tqdm(loader, total=len(loader), desc="training..."):
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"]  # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu()  # B x (2 * NUM_PTS)
        
        loss = loss_fn(pred_landmarks, landmarks)
        train_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return np.mean(train_loss, dtype=np.float64)




def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss = []
    for batch in tqdm.tqdm(loader, total=len(loader), desc="validation..."):
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks)
        val_loss.append(loss.item())

    return np.mean(val_loss, dtype=np.float64)

In [13]:
# train_transforms = transforms.Compose([
#         TransformByKeys(transforms.ToTensor(), ("image",)),
#         TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),
#     ])


# test_transforms = transforms.Compose([
#         TransformByKeys(transforms.ToTensor(), ("image",)),
#         TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),
#     ])

In [14]:
train_transforms = transforms.Compose([
        TransformByKeys(transforms.Grayscale(num_output_channels=1), ("image",)),
        TransformByKeys(transforms.ToTensor(), ("image",)),
        TransformByKeys(transforms.Normalize(mean=[0.5], std=[0.225]), ("image",)),
    ])


test_transforms = transforms.Compose([
        TransformByKeys(transforms.Grayscale(num_output_channels=1), ("image",)),
        TransformByKeys(transforms.ToTensor(), ("image",)),
        TransformByKeys(transforms.Normalize(mean=[0.5], std=[0.225]), ("image",)),
    ])

In [15]:
dataset = LandmarksDataset(DIR_TRAIN_IMAGES, df_landmarks, transforms = train_transforms)

In [16]:
p = 0.85
row_count, _ = df_landmarks.shape
len_train = int(row_count*p)
len_valid = row_count - len_train
print(len_train, len_valid)

1699 300


In [17]:
train_set, valid_set = torch.utils.data.random_split(dataset, [len_train, len_valid])

In [18]:
# for sample in train_set:
#     break
# sample['landmarks']

In [19]:
train_dataloader = data.DataLoader(train_set, batch_size=BATCH_SIZE, num_workers=0, pin_memory=True,
                                   shuffle=True, drop_last=True)
val_dataloader = data.DataLoader(valid_set, batch_size=BATCH_SIZE, num_workers=0, pin_memory=True,
                                 shuffle=False, drop_last=False)

In [20]:
gc.collect()

100

In [22]:
from models import *

In [23]:
model_params = {'num_classes': 2 * NUM_PTS}
model = resnet50(model_params)

# model.head = nn.Linear(model.head.in_features, 2 * NUM_PTS, bias=True)
model.to(device)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (mish): BetaMish()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (mish): BetaMish()
      (downsample): Sequential(
        (0): Conv2d(64, 

In [24]:
# model_params = {'in_chans':1, 'num_classes': 2 * NUM_PTS, 'remove_aa_jit': True}
# model = TResnetM(model_params)

# # model.head = nn.Linear(model.head.in_features, 2 * NUM_PTS, bias=True)
# model.to(device)

In [25]:
LR = 1e-3
EPOCHS = 50

In [26]:
# torch.log  and math.log is e based
class AdaptiveWingLoss(nn.Module):
    def __init__(self, omega=14, theta=0.5, epsilon=1, alpha=2.1):
        super(AdaptiveWingLoss, self).__init__()
        self.omega = omega
        self.theta = theta
        self.epsilon = epsilon
        self.alpha = alpha

    def forward(self, pred, target):
        '''
        :param pred: BxNxHxH
        :param target: BxNxHxH
        :return:
        '''
        y = target
        y_hat = pred
        delta_y = (y - y_hat).abs()
        delta_y1 = delta_y[delta_y < self.theta]
        delta_y2 = delta_y[delta_y >= self.theta]
        y1 = y[delta_y < self.theta]
        y2 = y[delta_y >= self.theta]
        loss1 = self.omega * torch.log(1 + torch.pow(delta_y1 / self.omega, self.alpha - y1))
        A = self.omega * (1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))) * (self.alpha - y2) * (
            torch.pow(self.theta / self.epsilon, self.alpha - y2 - 1)) * (1 / self.epsilon)
        C = self.theta * A - self.omega * torch.log(1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))
        loss2 = A * delta_y2 - C
        return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))

In [27]:
class WingLoss(nn.Module):
    def __init__(self, omega=10, epsilon=2):
        super(WingLoss, self).__init__()
        self.omega = omega
        self.epsilon = epsilon

    def forward(self, pred, target):
        y = target
        y_hat = pred
        delta_y = (y - y_hat).abs()
        delta_y1 = delta_y[delta_y < self.omega]
        delta_y2 = delta_y[delta_y >= self.omega]
        loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
        C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
        loss2 = delta_y2 - C
        return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))

In [28]:
optimizer = optim.Adam(model.parameters(), lr=LR, amsgrad=True)
# loss_fn = fnn.mse_loss
loss_fn = WingLoss()
# scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [29]:
# with open(f"model_resnet34_face256_wingloss.pth", "rb") as fp:
#     best_state_dict = torch.load(fp, map_location="cpu")
#     model.load_state_dict(best_state_dict)

In [30]:
print("Ready for training...")
best_val_loss = np.inf
for epoch in range(EPOCHS):

    train_loss = train(model, train_dataloader, loss_fn, optimizer,  device=device)
    val_loss = validate(model, val_dataloader, loss_fn, device=device)
#     scheduler.step()

    print("Epoch #{:2}:\ttrain loss: {:10.7}\tval loss: {:10.7}".format(epoch, train_loss, val_loss))
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"model_resnet50_face256_wingloss.pth", "wb") as fp:
            torch.save(model.state_dict(), fp)

Ready for training...


training...:   0%|▏                                                                    | 1/424 [00:05<38:18,  5.43s/it]


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 3.00 GiB total capacity; 1.64 GiB already allocated; 11.19 MiB free; 9.86 MiB cached)