In [1]:
import numpy as np
import pandas as pd
import os
import time
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data as data_utils
from torchvision import datasets, models, transforms

from sklearn.model_selection import KFold, train_test_split

import albumentations as A
import albumentations_experimental as AE
from albumentations.pytorch import ToTensorV2
import cv2
from tqdm import tqdm
import model

# Connect your script to Neptune
import neptune
import neptune_config

In [2]:
# Prefix data directory
prefix_dir = '.'

env = 'lk3'

# Use Yolo
use_yolo = True
cropped = 'cropped2_' if use_yolo else ''

# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
train_dir = f'{prefix_dir}/data/{cropped}train_imgs'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = 'resnext50_32x4d'

# Number of classes in the dataset
num_classes = 48

# Batch size for training (change depending on how much memory you have)
batch_size = 64

# Number of epochs and earlystop to train for
num_epochs = 200

# validation set ratio
num_splits = 10
num_earlystop = 10 if num_epochs // 10 < 10 else num_epochs // 10
# not use
# num_earlystop = 0

# Iput size for resize imgae
input_size = 180

# Learning rate for optimizer
learning_rate = 0.01

# Use K-folds
use_kfolds = False

# Use multi-GPU
cuda_num = 2

In [3]:
df = pd.read_csv(f'{prefix_dir}/data/{cropped}train_df.csv')

imgs = df.iloc[:, 0].to_numpy()
motions = df.iloc[:, 1:]
columns = motions.columns.to_list()[::2]
class_labels = [label.replace('_x', '').replace('_y', '') for label in columns]
keypoints = []
for motion in motions.to_numpy():
    a_keypoints = []
    for i in range(0, motion.shape[0], 2):
        a_keypoints.append((float(motion[i]), float(motion[i+1])))
    keypoints.append(a_keypoints)
keypoints = np.array(keypoints)

In [4]:
ns = neptune.init(project_qualified_name='mybirth0407/dacon-motion',
             api_token=neptune_config.token)

# Create experiment
neptune.create_experiment(f'{model_name}')

neptune.log_metric('batch_size', batch_size)
neptune.log_metric('num_epochs', num_epochs)
neptune.log_metric('num_splits', num_splits)
neptune.log_metric('num_ealrystop', num_earlystop)
neptune.log_metric('input_size', input_size)
neptune.log_metric('learning_rate', learning_rate)
neptune.log_metric('use_kfolds', use_kfolds)
neptune.log_metric('use_yolo', use_yolo)

psutil is not installed. You will not be able to abort this experiment from the UI.
psutil is not installed. Hardware metrics will not be collected.


https://ui.neptune.ai/mybirth0407/dacon-motion/e/DAC-231


In [5]:
counter = ns._get_current_experiment()._id
os.mkdir(f'{prefix_dir}/{env}/{counter}')
print(counter)

DAC-231


In [6]:
def train_val_split(imgs, keypoints, random_state):
    d = dict()
    for file in imgs:
        key = ''.join(file.split('-')[:-1])
        if key not in d.keys():
            d[key] = [file]
        else:
            d[key].append(file)
            
    np.random.seed(random_state)
    trains = []
    validations = []
    for key, value in d.items():
        r = np.random.randint(len(value), size=2)
        for i in range(len(value)):
            if i in r:
                validations.append(np.where(imgs == value[i])[0][0])
            else:
                trains.append(np.where(imgs == value[i])[0][0])
    return imgs[trains], imgs[validations], keypoints[trains], keypoints[validations]

In [7]:
def train_model(model, dataloaders, criterion, optimizer, earlystop=0, num_epochs=25, monitor='val', allsave=False, phases=['train', 'val']):
    since = time.time()
    
    train_loss_history = []
    val_loss_history = []
    
    earlystop_value = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 999999999
    
    for epoch in range(num_epochs):
        epoch_since = time.time()
        if earlystop and earlystop_value >= earlystop:
            break

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in phases:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    outputs = model(inputs)
                    loss = criterion(outputs.float(), labels.float())

                    # for classification
#                     _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # for classification
#                 running_corrects += torch.sum(preds == labels.data)
                # for regression
#                 running_corrects += torch.sum(outputs.float() == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
#             epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            epoch_time_elapsed = time.time() - epoch_since
            print('{} ({}) Loss: {:.4f} Elapsed time: {:.0f}m {:.0f}s'.format(
                phase, len(dataloaders[phase].dataset), epoch_loss, epoch_time_elapsed // 60, epoch_time_elapsed % 60))
            neptune.log_metric(f'{phase}_loss', epoch_loss)
#             neptune.log_metric(f'{phase}_acc', epoch_acc)
                
            # deep copy the model
            if phase == 'val':
                if monitor == 'val':
                    if epoch_loss < best_loss:
                        best_loss = epoch_loss
                        neptune.log_metric(f'{phase}_best_loss', best_loss)
                        best_model_wts = copy.deepcopy(model.state_dict())
                        torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}.pt')
                        print('copied model')
                        earlystop_value = 0
                    else:
                        earlystop_value += 1
                        if allsave:
                            torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}_{epoch_loss:.2f}_{epoch}.pt')
                    val_loss_history.append(epoch_loss)
            elif phase == 'train':
                if monitor == 'train':
                    if epoch_loss < best_loss:
                        best_loss = epoch_loss
                        neptune.log_metric(f'{phase}_best_loss', best_loss)
                        best_model_wts = copy.deepcopy(model.state_dict())
                        torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}.pt')
                        print('copied model')
                        earlystop_value = 0
                    else:
                        earlystop_value += 1
                        if allsave:
                            torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}_{epoch_loss:.2f}_{epoch}.pt')
                    train_loss_history.append(epoch_loss)
        print()

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    if monitor == 'val':
        print('Best Validation Loss: {:4f}\n'.format(best_loss))
    elif monitor == 'train':
        print('Best Training Loss: {:4f}\n'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, best_loss

In [8]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [9]:
# # Initialize the model for this run
model_ft, input_size = model.initialize_model(model_name, input_size, num_classes, use_pretrained=True)
# set_parameter_requires_grad(model_ft, feature_extract)

# Detect if we have a GPU available
device = torch.device(f'cuda:{cuda_num}' if torch.cuda.is_available() else 'cpu')

# Send the model to GPU
model_ft = model_ft.to(device)

# Multi GPU
# os.environ["CUDA_VISIBLE_DEVICES"] = '2, 3'
model = nn.DataParallel(model_ft, device_ids=[2, 3], output_device=2)

# Print the model we just instantiated
# print(model_ft)

Downloading: "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth" to /home/mybirth0407/.cache/torch/hub/checkpoints/resnext50_32x4d-7cdf4587.pth


  0%|          | 0.00/95.8M [00:00<?, ?B/s]

  return torch._C._cuda_getDeviceCount() > 0


In [10]:
# # Data augmentation and normalization for training
# # Just resize and normalization for validation

A_transforms = {
    'train':
        A.Compose([
            A.Resize(input_size, input_size, always_apply=True),
            A.RandomBrightnessContrast(p=0.3),
            AE.HorizontalFlipSymmetricKeypoints(
                # target list for horizontal filp
                # 0 nose -> 0 nose
                # 1 left_eye -> 2 right eye, 3 left ear -> 4 right ear, ... 22 left instep -> 23 right instep
                symmetric_keypoints=[[0, 0], [1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 17], [18, 19], [20, 20], [21, 21], [22, 23]],
                p=0.3
            ),
            A.OneOf([
                A.RandomRotate90(p=1),
                A.VerticalFlip(p=1),
            ], p=0.5),
            A.OneOf([
                A.MotionBlur(p=1),
                A.GaussNoise(p=1)                 
            ], p=0.5),
            
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=False, angle_in_degrees=True)),
    
    'val':
        A.Compose([
            A.Resize(input_size, input_size, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=False, angle_in_degrees=True)),
    
    'test':
        A.Compose([
            A.Resize(input_size, input_size, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
}

In [11]:
class Dataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, keypoints, phase, class_labels=None, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.keypoints = keypoints
        self.phase = phase
        self.class_labels = class_labels
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        keypoints = self.keypoints[idx]
    
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img, keypoints=keypoints, class_labels=self.class_labels)
            img = augmented['image']
            keypoints = augmented['keypoints']
        keypoints = np.array(keypoints).flatten()

        return img, keypoints
    
    def __len__(self):
        return len(self.imgs)

In [12]:
# Setup the loss fxn
criterion = nn.MSELoss()

print(f'k-folds use: {use_kfolds}')
print(f'yolo use: {use_yolo}')

full_since = time.time()

if use_kfolds:
    kf = KFold(num_splits, random_state=42, shuffle=True)

    for i, (train_index, val_index) in enumerate(kf.split(imgs)):
        print(f'{i+1}/{num_splits} folds iteration')
        since = time.time()
        X_train, X_val = imgs[train_index], imgs[val_index]
        y_train, y_val = keypoints[train_index], keypoints[val_index]
        train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
        val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
        train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True)
        val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False)
        dataloaders = {'train': train_loader, 'val': val_loader}

        # Observe that all parameters are being optimized
        optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

        # Train and evaluate
        model_ft, best_loss = train_model(
            model_ft, dataloaders, criterion, optimizer_ft,
            num_epochs=num_epochs, earlystop=num_earlystop)
        torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}_{i+1}_{best_loss:.2f}.pt')
        time_elapsed = time.time() - since
        print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
else:
    since = time.time()
    X_train, X_val, y_train, y_val = train_val_split(imgs, keypoints, random_state=42)
    train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
    val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
    train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, num_workers=8, shuffle=True)
    val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, num_workers=8, shuffle=False)
    dataloaders = {'train': train_loader, 'val': val_loader}

    # Observe that all parameters are being optimized
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

    # Train and evaluate
    model_ft, best_loss = train_model(
        model_ft, dataloaders, criterion, optimizer_ft,
        num_epochs=num_epochs, earlystop=num_earlystop)
    torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}_{best_loss:.2f}.pt')
    time_elapsed = time.time() - since
    print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

fulltime_elapsed = time.time() - full_since
print('All process done!\nElapsed time: {:.0f}m {:.0f}s\n'.format(fulltime_elapsed // 60, fulltime_elapsed % 60))

k-folds use: False
yolo use: True
Epoch 1/200
----------


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/mybirth0407/miniconda3/envs/torch_101/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-7a5f99507d74>", line 48, in <module>
    num_epochs=num_epochs, earlystop=num_earlystop)
  File "<ipython-input-7-760f89726209>", line 45, in train_model
    outputs = model(inputs)
  File "/home/mybirth0407/miniconda3/envs/torch_101/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/mybirth0407/miniconda3/envs/torch_101/lib/python3.7/site-packages/torchvision/models/resnet.py", line 220, in forward
    return self._forward_impl(x)
  File "/home/mybirth0407/miniconda3/envs/torch_101/lib/python3.7/site-packages/torchvision/models/resnet.py", line 209, in _forward_impl
    x = self.layer2(x)
  File "/home/mybirth0407/miniconda3/envs/torch_101/lib/python

TypeError: object of type 'NoneType' has no len()

In [16]:
# model_ft.load_state_dict(torch.load(f'{prefix_dir}/{env}/{counter}/resnet18_18.95.pt'))

In [None]:
test_dir = f'./data/{cropped}test_imgs'
test_imgs = os.listdir(test_dir)
test_imgs.sort()

In [None]:
class TestDataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, phase, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.phase = phase
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        filename = self.imgs[idx]
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))
        h = img.shape[0]
        w = img.shape[1]
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img)
            img = augmented['image']
        return filename, img, (h, w)
    
    def __len__(self):
        return len(self.imgs)
    
test_data = TestDataset(test_dir, test_imgs, data_transforms=A_transforms, phase='test')
test_loader = data_utils.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
all_predictions = []
files = []
shapes = []
with torch.no_grad():
    for filenames, inputs, shape in tqdm(test_loader):
        predictions = list(model_ft(inputs.to(device)).cpu().numpy())
        files.extend(filenames)
        
        shapes.extend(shape)
        for prediction in predictions:
            all_predictions.append(prediction)
            
origin_shape_y = shapes[0].numpy()
origin_shape_x = shapes[1].numpy()
for i in range(1, len(shapes) // 2):
    origin_shape_y = np.append(origin_shape_y, shapes[2*i].numpy())
    origin_shape_x = np.append(origin_shape_x, shapes[2*i + 1].numpy())

all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] /= input_size / origin_shape_x[i]
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= input_size / origin_shape_y[i]

In [None]:
res_df = pd.read_csv(f'{prefix_dir}/data/res2_test_df.csv')
res = res_df.iloc[:, 1:].to_numpy()

all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] += res[i][0]
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] += res[i][1]

In [None]:
df_sub = pd.read_csv(f'{prefix_dir}/data/sample_submission.csv')
df = pd.DataFrame(columns=df_sub.columns)
df['image'] = files
df.iloc[:, 1:] = all_predictions
df.head()

In [None]:
df.to_csv(f'{prefix_dir}/submissions/{counter}_{model_name}_{best_loss:.2f}.csv', index=False)

In [None]:
# tune_train_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='train')
# tune_val_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='val')
# tune_train_loader = data_utils.DataLoader(tune_train_data, batch_size=batch_size, shuffle=True)
# tune_val_loader = data_utils.DataLoader(tune_val_data, batch_size=batch_size, shuffle=False)
# tune_dataloaders = {'train': tune_train_loader, 'val': tune_val_loader}

# # Observe that all parameters are being optimized
# optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

# # Train and evaluate
# since = time.time()
# model_ft, best_loss = train_model(
#     model_ft, tune_dataloaders, criterion, optimizer_ft,
#     num_epochs=10, earlystop=0, allsave=True)
# torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}_tuned_{best_loss:.2f}.pt')
# time_elapsed = time.time() - since
# print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

In [None]:
# tune_train_data = Dataset(train_dir, imgs, keypoints, data_transforms=A_transforms, class_labels=class_labels, phase='train')
# tune_train_loader = data_utils.DataLoader(tune_train_data, batch_size=batch_size, shuffle=True)
# tune_dataloaders = {'train': tune_train_loader}

# # Observe that all parmeters are being optimized
# optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

# # Train and evaluate
# since = time.time()
# model_ft, best_loss = train_model(
#     model_ft, tune_dataloaders, criterion, optimizer_ft,
#     num_epochs=3, earlystop=0, allsave=True, monitor='train', phases=['train'])
# torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter}/{model_name}_tuned_{best_loss:.2f}.pt')
# time_elapsed = time.time() - since
# print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

In [None]:
# all_predictions = []
# files = []
# shapes = []
# with torch.no_grad():
#     for filenames, inputs, shape in tqdm(test_loader):
#         predictions = list(model_ft(inputs.to(device)).cpu().numpy())
#         files.extend(filenames)
        
#         shapes.extend(shape)
#         for prediction in predictions:
#             all_predictions.append(prediction)
            
# origin_shape_y = shapes[0].numpy()
# origin_shape_x = shapes[1].numpy()
# for i in range(1, len(shapes) // 2):
#     origin_shape_y = np.append(origin_shape_y, shapes[2*i].numpy())
#     origin_shape_x = np.append(origin_shape_x, shapes[2*i + 1].numpy())

In [None]:
# all_predictions = np.array(all_predictions)
# for i in range(all_predictions.shape[0]):
#     all_predictions[i, [2*j for j in range(num_classes//2)]] /= input_size / origin_shape_x[i]
#     all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= input_size / origin_shape_y[i]
    
# res_df = pd.read_csv(f'{prefix_dir}/data/res2_test_df.csv')
# res = res_df.iloc[:, 1:].to_numpy()

# all_predictions = np.array(all_predictions)
# for i in range(all_predictions.shape[0]):
#     all_predictions[i, [2*j for j in range(num_classes//2)]] += res[i][0]
#     all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] += res[i][1]

In [None]:
# df_sub = pd.read_csv(f'{prefix_dir}/data/sample_submission.csv')
# df = pd.DataFrame(columns=df_sub.columns)
# df['image'] = files
# df.iloc[:, 1:] = all_predictions
# df.head()

In [None]:
# df.to_csv(f'{prefix_dir}/submissions/{counter}_{model_name}_tuned_{best_loss:.2f}.csv', index=False)

In [None]:
neptune.stop()
torch.cuda.empty_cache()

In [None]:
print(counter)