In [1]:
import os
from matplotlib import pyplot as plt
import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import time
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import PIL.Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [2]:
print(device)

cpu


# CNN

In [3]:
class ResNet34(nn.Module):
    def __init__(self, num_classes):
        super(ResNet34, self).__init__()
        self.resnet = models.resnet18(pretrained=False)
        self.resnet.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.resnet(x)
        return x

# Dataset

In [4]:
class ArgoverseDataset(torch.utils.data.Dataset):
    """Dataset class for Argoverse"""
    
    def __init__(self, 
                 data_path,
                 target_path,
                 sample_indices):
        super(ArgoverseDataset, self).__init__()
        
        self.data_path = data_path
        self.target_path = target_path
        self.sample_indices = sample_indices
        self.img_list = glob(os.path.join(self.data_path, '*'))
        self.target_list = glob(os.path.join(self.target_path, '*'))
        
        self.transform = transforms.Compose([transforms.PILToTensor()])
        
    def __len__(self):
        return len(self.sample_indices)

    def __getitem__(self, idx):
        
        # Load one scene
        pkl_path = self.target_list[self.sample_indices[idx]]
        with open(pkl_path, 'rb') as f:
            target = pickle.load(f)
            
        img_path = self.img_list[self.sample_indices[idx]]
        rgba_image = PIL.Image.open(img_path)
        rgb_image = rgba_image.convert('RGB')
        # img = self.transform(rgb_image)
        img = torchvision.transforms.functional.to_tensor(rgb_image)
        
        # Convert to float torch tensor
        return img, target

In [5]:
# Try different ways of normalization
# Leverage other features. 

# Hyperparameter

In [6]:
# Grid/Random Search

In [7]:
batch_size = 32
num_classes = 60
in_dim = (400, 400)
learning_rate = 0.01
decay_rate = 0.95
num_epoch = 10

# Data Loader

In [8]:
img_path = "./rasterization_try2/rasterized_try2"
target_path = "./rasterization_out/rasterized_out"

# total number of scenes
# indices = np.arange(0, 205942)
indices = np.arange(0, 13299)

# train-valid split
np.random.shuffle(indices)
train_indices = indices[:9309]
valid_indices = indices[9309:]

# define datasets
train_set = ArgoverseDataset(img_path, target_path, train_indices)
valid_set = ArgoverseDataset(img_path, target_path, valid_indices)

# create dataloaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=0)

# Model, Loss Function and Optimizer

In [9]:
# RNN, LSTM, 1dCNN, Transformer
model = ResNet34(num_classes=num_classes).to(device) # move model to gpu 

# Adaptive Moment Estimation computes adaptive learning rates for each parameter. 
# Compute the decaying averages of past and past squared gradients. 

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=decay_rate)  # stepwise learning rate decay
loss_fun = nn.MSELoss()

# Training

In [10]:
def train_epoch(train_loader, model, optimizer, loss_function):
    train_mse = []
    for inp, tgt in train_loader:
        inp = inp.to(device)
        tgt = tgt.to(device)
        pred = model(inp)

        loss = loss_function(pred.float(), tgt.float())
        train_mse.append(loss.item()) 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    return train_mse

def eval_epoch(valid_loader, model, loss_function):
    valid_mse = []
    preds = []
    trues = []
    loss_arr = []
    with torch.no_grad():
        for inp, tgt in valid_loader:
            inp = inp.to(device)
            tgt = tgt.to(device)
            
            loss = 0
            pred = model(inp)
            loss = loss_function(pred.float(), tgt.float())
            loss_arr.append(loss)
            preds.append(pred)
            trues.append(tgt)
            valid_mse.append(loss.item())
        preds = np.concatenate(preds, axis = 0)  
        trues = np.concatenate(trues, axis = 0)  
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse, preds, trues, loss_arr


In [11]:
# Learning Rate Decay
# Dropout
# L1/L2 Regulization

In [13]:
train_rmse = []
valid_rmse = []
loss_arr_val = []
min_rmse = 10e8

name = 'cnn_second_try'
for i in range(num_epoch):
    start = time.time()

    # model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(train_loader, model, optimizer, loss_fun))

    # model.eval()
    val_rmse, val_preds, val_trues, loss_arr = eval_epoch(valid_loader, model, loss_fun)
    valid_rmse.append(val_rmse)
    loss_arr_val.append(loss_arr)

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1] 
        best_model = model
        torch.save([best_model, i, learning_rate], name + ".pth")

    end = time.time()
    
    # Early Stopping
    if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
            break       

    # Learning Rate Decay        
    scheduler.step()
    with open('cnn_train_rmse.npy', 'wb') as f:
        np.save(f, np.array(train_rmse))
    with open('cnn_valid_rmse.npy', 'wb') as f:
        np.save(f, np.array(valid_rmse))
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f}".format(i + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1]))

Epoch 1 | T: 46.13 | Train RMSE: 14.64716 | Valid RMSE: 14.72030
Epoch 2 | T: 52.06 | Train RMSE: 14.64616 | Valid RMSE: 14.72388
Epoch 3 | T: 48.12 | Train RMSE: 14.64610 | Valid RMSE: 14.71897
Epoch 4 | T: 45.51 | Train RMSE: 14.64513 | Valid RMSE: 14.73898
Epoch 5 | T: 45.32 | Train RMSE: 14.64351 | Valid RMSE: 14.72897
Epoch 6 | T: 45.42 | Train RMSE: 14.64594 | Valid RMSE: 14.72281
Epoch 7 | T: 44.87 | Train RMSE: 14.64202 | Valid RMSE: 14.72966
Epoch 8 | T: 44.54 | Train RMSE: 14.64240 | Valid RMSE: 14.72619
Epoch 9 | T: 39.55 | Train RMSE: 14.64077 | Valid RMSE: 14.73728
Epoch 10 | T: 38.33 | Train RMSE: 14.64316 | Valid RMSE: 14.72585


# Evaluation and Submission

In [None]:
test_path = "./rasterization_val/rasterized_val/"
test_img_list = glob(os.path.join(test_path, '*'))
test_img_list.sort()

with open('./tracked_9th_shift.p', 'rb') as f:
    tracked_9th_shift = pickle.load(f)

test_preds = []
for idx in range(len(test_img_list)):
    img_path = test_img_list[idx]
    rgba_image = PIL.Image.open(img_path)
    rgb_image = rgba_image.convert('RGB')
    inp = torchvision.transforms.functional.to_tensor(rgb_image)
    inp = torch.reshape(inp, (1, 3, 400, 400))
        
    preds = best_model(inp) 
    preds = np.reshape(preds.detach().numpy(), (30, 2)) + tracked_9th_shift[idx]
    test_preds.append(preds)

# Generate Submission File

In [None]:
# # Submission Files
sample_sub = pd.read_csv('sample_submission.csv')

In [None]:
# Convert to int
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1).astype(int)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df.to_csv('test_submission.csv', index=None)