In [1]:
import numpy as np
import pandas as pd
import os
import time
import copy

from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data as data_utils
from torchvision import datasets, models, transforms

from sklearn.model_selection import KFold, train_test_split

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

# Connect your script to Neptune
import neptune
import neptune_config

In [2]:
# Prefix data directory
prefix_dir = '.'

# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
train_dir = f'{prefix_dir}/data/cropped_train_imgs'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = 'resnet'
model_ver = '18'

# Number of classes in the dataset
num_classes = 48

# Batch size for training (change depending on how much memory you have)
batch_size = 64

# Number of epochs and earlystop to train for
num_epochs = 200

num_splits = 10
num_earlystop = 10

# Input size for resize image
input_w = 150
input_h = 150

# Learning rate for optimizer
learning_rate = 0.01

# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = False

# Use K-folds
use_kfolds = False

'\ntrain\n049-1-1-03-Z17_C-0000005.jpg\n049-1-1-03-Z17_C-0000007.jpg\n049-1-1-03-Z17_C-0000019.jpg\n049-1-1-03-Z17_C-0000021.jpg\n049-1-1-03-Z17_C-0000023.jpg\n050-1-1-03-Z17_C-0000011.jpg\n050-1-1-03-Z17_C-0000015.jpg\n050-1-1-03-Z17_C-0000029.jpg\n561-1-3-27-Z134_C-0000009.jpg\n\ntest\n729-3-5-36-Z94_A-0000013.jpg\n730-3-5-36-Z94_A-0000011.jpg\n730-3-5-36-Z94_A-0000015.jpg\n730-3-5-36-Z94_C-0000015.jpg\n'

In [3]:
neptune.init(project_qualified_name='mybirth0407/dacon-motion',
             api_token=neptune_config.token)

with open(f'{prefix_dir}/counter.txt', 'r+') as f:
    content = f.read().strip()
    counter = int(content) + 1
    f.seek(0)
    f.write(f'{counter}')
    print(counter)

# Create experiment
neptune.create_experiment(f'{counter:2d} - {model_name}{model_ver}')

neptune.log_metric('batch_size', batch_size)
neptune.log_metric('num_epochs', num_epochs)
neptune.log_metric('num_splits', num_splits)
neptune.log_metric('num_ealrystop', num_earlystop)
neptune.log_metric('input_width', input_w)
neptune.log_metric('input_height', input_h)
neptune.log_metric('learning_rate', learning_rate)

46
https://ui.neptune.ai/mybirth0407/dacon-motion/e/DAC-37


In [4]:
df = pd.read_csv(f'{prefix_dir}/data/cropped_train_df.csv')
df.head()

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,...,right_palm_x,right_palm_y,spine2(back)_x,spine2(back)_y,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y
0,001-1-1-01-Z17_A-0000001.jpg,138.389631,44.757881,133.655294,29.820225,151.429507,34.48423,112.117796,38.890539,140.0,...,159.0,35.0,111.48423,155.0,118.51577,214.05473,90.578836,526.718013,155.204067,538.827465
1,001-1-1-01-Z17_A-0000003.jpg,144.850679,34.711494,133.608552,18.59369,150.242111,19.59369,116.422997,25.694815,140.593682,...,156.18738,17.0,121.953248,148.062706,133.766231,202.797029,77.265676,393.062706,141.376234,535.499445
2,001-1-1-01-Z17_A-0000005.jpg,150.475902,34.000008,144.717997,20.757889,161.648412,22.242119,127.039884,26.351571,152.461032,...,167.0,31.0,110.53896,139.05473,118.844144,192.890539,55.437847,505.757889,132.071417,538.749554
3,001-1-1-01-Z17_A-0000007.jpg,148.320047,49.452689,143.907194,32.117804,156.328382,41.913729,122.844144,28.913737,148.164191,...,163.406318,60.46104,88.937294,146.109462,96.375124,195.624866,107.305177,517.233767,265.516499,287.389997
4,001-1-1-01-Z17_A-0000009.jpg,146.046395,28.164191,134.717997,16.703163,146.13265,16.781079,119.258806,23.59369,137.81262,...,157.648429,19.109461,112.843791,138.687572,122.391088,195.843791,86.625231,490.218921,147.625956,524.765102


In [5]:
imgs = df.iloc[:, 0].to_numpy()
motions = df.iloc[:, 1:]
columns = motions.columns.to_list()[::2]
class_labels = [label.replace('_x', '').replace('_y', '') for label in columns]
keypoints = []
for motion in motions.to_numpy():
    a_keypoints = []
    for i in range(0, motion.shape[0], 2):
        a_keypoints.append((float(motion[i]), float(motion[i+1])))
    keypoints.append(a_keypoints)
keypoints = np.array(keypoints)

In [6]:
def train_model(model, dataloaders, criterion, optimizer, earlystop=0, num_epochs=25, is_inception=False):
    since = time.time()
    
    val_acc_history = []
    val_loss_history = []
    earlystop_value = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0
    best_loss = 999999999
    
    for epoch in range(num_epochs):
        epoch_since = time.time()
        if earlystop and earlystop_value >= earlystop:
            break

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs.float(), labels.float())
                        loss2 = criterion(aux_outputs.float(), labels.float())
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs.float(), labels.float())

                    # for classification
#                     _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # for classification
#                 running_corrects += torch.sum(preds == labels.data)
                # for regression
                running_corrects += torch.sum(outputs == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            epoch_time_elapsed = time.time() - epoch_since
            print('{} ({}) Loss: {:.4f} Acc: {:.4f} Elapsed time: {:.0f}m {:.0f}s'.format(
                phase, len(dataloaders[phase].dataset), epoch_loss, epoch_acc, epoch_time_elapsed // 60, epoch_time_elapsed % 60))
            neptune.log_metric(f'{phase}_loss', epoch_loss)
            neptune.log_metric(f'{phase}_acc', epoch_acc)
            
            # deep copy the model
            if phase == 'val':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    earlystop_value = 0
                else:
                    earlystop_value += 1
                val_loss_history.append(epoch_loss)
                val_acc_history.append(epoch_acc)
        print()

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best validation Acc: {:4f}\n'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, {'acc': val_acc_history, 'loss': val_loss_history}

In [7]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [8]:
def initialize_model(model_name, model_ver, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        
    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)

    elif model_name == "vgg":
        """ VGG13_bn
        """
        model_ft = models.vgg13_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft

# Initialize the model for this run
model_ft = initialize_model(model_name, model_ver, num_classes, feature_extract, use_pretrained=True)

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Print the model we just instantiated
print(model_ft)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
# # Data augmentation and normalization for training
# # Just resize and normalization for validation

A_transforms = {
    'train':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
#             A.Resize(input_h, input_w, p=1),
            A.RandomBrightnessContrast(p=0.3),
            A.HorizontalFlip(p=0.3),
            A.RandomRotate90(p=0.3),
            A.VerticalFlip(p=0.3),
#             A.OneOf([A.HorizontalFlip(p=1),
#                      A.RandomRotate90(p=1),
#                      A.VerticalFlip(p=1)            
#             ], p=0.5),
#             A.OneOf([A.MotionBlur(p=1),
#                      A.GaussNoise(p=1)                 
#             ], p=0.5),
            A.MotionBlur(p=0.3),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'val':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'test':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
}

In [10]:
class Dataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, keypoints, phase, class_labels=None, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.keypoints = keypoints
        self.phase = phase
        self.class_labels = class_labels
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        keypoints = self.keypoints[idx]
    
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img, keypoints=keypoints, class_labels=self.class_labels)
            img = augmented['image']
            keypoints = augmented['keypoints']
        keypoints = np.array(keypoints).flatten()

        return img, keypoints
    
    def __len__(self):
        return len(self.imgs)

In [11]:
# # Gather the parameters to be optimized/updated in this run. If we are
# #  finetuning we will be updating all parameters. However, if we are
# #  doing feature extract method, we will only update the parameters
# #  that we have just initialized, i.e. the parameters with requires_grad
# #  is True.
# params_to_update = model_ft.parameters()
# print("Params to learn:")
# if feature_extract:
#     params_to_update = []
#     for name,param in model_ft.named_parameters():
#         if param.requires_grad == True:
#             params_to_update.append(param)
#             print("\t",name)
# else:
#     for name,param in model_ft.named_parameters():
#         if param.requires_grad == True:
#             print("\t",name)

# # Observe that all parameters are being optimized
# optimizer_ft = optim.Adam(params_to_update, lr=learning_rate)

In [None]:
# Setup the loss fxn
criterion = nn.MSELoss()

print(f'k-folds use: {use_kfolds}')

full_since = time.time()

if use_kfolds:
    kf = KFold(num_splits, random_state=42, shuffle=True)

    for i, (train_index, val_index) in enumerate(kf.split(imgs)):
        print(f'{i+1}/{num_splits} folds iteration')
        since = time.time()
        X_train, X_val = imgs[train_index], imgs[val_index]
        y_train, y_val = keypoints[train_index], keypoints[val_index]
        train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
        val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
        train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True)
        val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False)
        dataloaders = {'train': train_loader, 'val': val_loader}

        # Observe that all parameters are being optimized
        optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

        # Train and evaluate
        model_ft, hists = train_model(
            model_ft, dataloaders, criterion, optimizer_ft,
            num_epochs=num_epochs, earlystop=num_earlystop, is_inception=(model_name=="inception"))
        torch.save(model_ft.state_dict(), f'{prefix_dir}/local/baseline_{counter:2d}_{model_name}{model_ver}_{i+1}.pt')
        time_elapsed = time.time() - since
        print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
else:
    since = time.time()
    X_train, X_val, y_train, y_val = train_test_split(imgs, keypoints, test_size=1/num_splits, random_state=42)
    train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
    val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
    train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False)
    dataloaders = {'train': train_loader, 'val': val_loader}

    # Observe that all parameters are being optimized
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

    # Train and evaluate
    model_ft, hists = train_model(
        model_ft, dataloaders, criterion, optimizer_ft,
        num_epochs=num_epochs, earlystop=num_earlystop, is_inception=(model_name=="inception"))
    torch.save(model_ft.state_dict(), f'{prefix_dir}/local/baseline_{counter:2d}_{model_name}{model_ver}_{i+1}.pt')
    time_elapsed = time.time() - since
    print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

fulltime_elapsed = time.time() - full_since
print('All process done!\nElapsed time: {:.0f}m {:.0f}s\n'.format(fulltime_elapsed // 60, fulltime_elapsed % 60))

k-folds use: False
Epoch 1/200
----------
train (3775) Loss: 1851.7512 Acc: 0.0000 Elapsed time: 0m 24s
val (420) Loss: 2170.4478 Acc: 0.0000 Elapsed time: 0m 27s

Epoch 2/200
----------
train (3775) Loss: 966.7560 Acc: 0.0000 Elapsed time: 0m 23s
val (420) Loss: 669.3168 Acc: 0.0000 Elapsed time: 0m 25s

Epoch 3/200
----------
train (3775) Loss: 639.6926 Acc: 0.0000 Elapsed time: 0m 23s
val (420) Loss: 514.5819 Acc: 0.0000 Elapsed time: 0m 25s

Epoch 4/200
----------
train (3775) Loss: 596.0945 Acc: 0.0000 Elapsed time: 0m 23s
val (420) Loss: 517.7041 Acc: 0.0000 Elapsed time: 0m 25s

Epoch 5/200
----------
train (3775) Loss: 563.7539 Acc: 0.0000 Elapsed time: 0m 23s
val (420) Loss: 654.5701 Acc: 0.0000 Elapsed time: 0m 25s

Epoch 6/200
----------
train (3775) Loss: 532.4304 Acc: 0.0000 Elapsed time: 0m 24s
val (420) Loss: 457.4572 Acc: 0.0000 Elapsed time: 0m 26s

Epoch 7/200
----------
train (3775) Loss: 488.1067 Acc: 0.0000 Elapsed time: 0m 23s
val (420) Loss: 400.4812 Acc: 0.0000 

In [None]:
torch.save(model_ft.state_dict(), f'{prefix_dir}/local/baseline_{counter:2d}_{model_name}{model_ver}_fv.pt')

In [None]:
model_ft.load_state_dict(torch.load(f'{prefix_dir}/local/baseline_{counter:2d}_{model_name}{model_ver}_fv.pt'))

In [None]:
test_dir = "./data/test_imgs"
test_imgs = os.listdir(test_dir)

In [None]:
class TestDataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, phase, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.phase = phase
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        filename = self.imgs[idx]
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))

        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img)
            img = augmented['image']
        return filename, img
    
    def __len__(self):
        return len(self.imgs)
    
test_data = TestDataset(test_dir, test_imgs, data_transforms=A_transforms, phase='test')
test_loader = data_utils.DataLoader(test_data, batch_size=batch_size * 4, shuffle=False)

In [None]:
all_predictions = []
files = []
with torch.no_grad():
    for filenames, inputs in test_loader:
        predictions = list(model_ft(inputs.to(device)).cpu().numpy())
        files.extend(filenames)
        for prediction in predictions:
            all_predictions.append(prediction)

In [None]:
all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] /= input_w / 1920
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= input_h / 1080

In [None]:
df_sub = pd.read_csv(f'{prefix_dir}/data/sample_submission.csv')
df = pd.DataFrame(columns=df_sub.columns)
df['image'] = files
df.iloc[:, 1:] = all_predictions
df.head()

In [None]:
df.to_csv(f'{prefix_dir}/submission_{counter:2d}_{model_name}{model_ver}.csv', index=False)