In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import numpy as np
import os
import cv2
import pickle as pkl
import pandas as pd
import matplotlib.pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
print(device)

cuda:0


In [3]:
num_epochs = 1

# Dataset Construction

In [4]:
# lazy loading
class LazyLoadDataset(Dataset):
    def __init__(self, path, train = True, transform=None):
        self.transform = transform
        path = path + 'train/' if train else path + 'test/'
        self.pathX = path + 'X/'
        self.pathY = path + 'Y/'
        self.is_train = train
        self.data = os.listdir(self.pathX)

    def __getitem__(self, idx):
        f = self.data[idx]

        img0 = cv2.imread(self.pathX + f + "/rgb/0.png")
        img1 = cv2.imread(self.pathX + f + "/rgb/1.png")
        img2 = cv2.imread(self.pathX + f + "/rgb/2.png")

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        depth = np.load(self.pathX + f + "/depth.npy")
        field_id = pkl.load(open(self.pathX + f + "/field_id.pkl", "rb"))
        if self.is_train:
            Y = np.load(self.pathY + f + ".npy")
            return (img0, depth, field_id), Y
        else:
            return (img0, depth, field_id)
    
    def __len__(self):
        return len(self.data)

In [5]:
transform = transforms.Compose([transforms.ToTensor(), 
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.RandomRotation(15),
#     transforms.RandomAdjustSharpness(0.2),
#     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
#     ])

train_data = LazyLoadDataset(path = 'lazydata/', train = True, transform = transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

In [6]:
(img0, depth, field_id), Y = train_data[0]

print(img0.shape, depth.shape)
print(Y.shape)
print(Y)
print(field_id)

torch.Size([3, 224, 224]) (3, 224, 224)
(12,)
[ 0.02236032  0.05468269  0.13108863  0.04905356 -0.00215862  0.12206387
  0.06858328 -0.03716908  0.03481495  0.05207223  0.0329286  -0.0572817 ]
1479


In [7]:
def train(epoch, model, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        img = data[0]
        depth = data[1]
        field_id = data[2]
        
        data = img # use the top view
        data, target = data.to(device), target.to(device)
        target = target.float()
        optimizer.zero_grad()
        output = model(data)
        loss = torch.sqrt(nn.MSELoss()(output, target))
        loss.backward()
        optimizer.step()
        if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [8]:
def test(model):
    preds = []
    file_ids = []
    outfile = 'submission-' + model.name + '.csv'
    output_file = open(outfile, 'w')
    titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
            'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
    transform = transforms.Compose([transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])
    test_data = LazyLoadDataset(path = 'lazydata/', train = False, transform = transform)
    test_loader = DataLoader(test_data, batch_size=1, shuffle=False)
    df = pd.DataFrame()
    model.eval()
    for i, data in enumerate(test_loader):
        # Please remember to modify this loop, input and output based on your model/architecture
        img = data[0]
        file_id = data[-1][0]
        pred = model(img.to('cuda'))
        pred = pred.cpu().detach().numpy()
        pred = pred.reshape(1, -1)
        pred = np.insert(pred, 0, file_id, axis=1)
        df = pd.concat([df, pd.DataFrame(pred)], axis=0)
    df.columns = titles
    df['ID'] = df['ID'].astype(int)
    df.to_csv(outfile, index = False)
    print("Written to csv file {}".format(outfile))

# Simple CNN

In [9]:
class CNN(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super().__init__()
        self.name = 'CNN'
        self.conv1 = nn.Conv2d(input_size, conv_feature, kernel_size=5)
        self.conv2 = nn.Conv2d(conv_feature, 16, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 53 * 53, fc_feature)
        self.fc2 = nn.Linear(fc_feature, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.fc1(x.view(-1, 16 * 53 * 53))
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [10]:
input_size = 3
output_size = 12

In [11]:
# Training settings 
conv_features = 6
fc_features = 120

model_cnn = CNN(input_size, conv_features, fc_features, output_size)
model_cnn.to(device)
print("trainning on:", device)
optimizer = optim.Adam(model_cnn.parameters(), lr=0.001)

for epoch in range(0, num_epochs):
    train(epoch, model_cnn, optimizer)

trainning on: cuda:0


In [12]:
test(model_cnn)

Written to csv file submission-CNN.csv


In [13]:
model_cnn

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=44944, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=12, bias=True)
)

# ResNet50

In [14]:
model = torchvision.models.resnet50(weights='ResNet50_Weights.IMAGENET1K_V2')

In [15]:
model.fc = nn.Sequential(
    nn.Linear(2048, 120),
    nn.ReLU(),
    nn.Linear(120, 12)
)

model.name = 'ResNet50'

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [16]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(0, num_epochs):
    train(epoch, model, optimizer)
test(model)

Written to csv file submission-ResNet50.csv


# Dropout layer

In [17]:
class CNNDropout(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super().__init__()
        self.name = 'CNN-Dropout'
        self.conv1 = nn.Conv2d(3, conv_feature, kernel_size=5)
        self.conv2 = nn.Conv2d(conv_feature, 16, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 53 * 53, fc_feature)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(fc_feature, 240)
        self.fc3 = nn.Linear(240, output_size)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.fc1(x.view(-1, 16 * 53 * 53))
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [18]:
model_cnn_dropout = CNNDropout(input_size, conv_features, fc_features, output_size)
model_cnn_dropout.to(device)
optimizer = optim.Adam(model_cnn_dropout.parameters(), lr=0.001)

In [19]:
for epoch in range(0, num_epochs):
    train(epoch, model_cnn_dropout, optimizer)

test(model_cnn_dropout)

Written to csv file submission-CNN-Dropout.csv


In [20]:
print(device)

cuda:0
