# CSCI-UA. 473 Intro to Machine Learning, Fall 2022
# Final Project Competition: Fingertips Position Estimation of a Robot Hand
# Kayan Shih (ks5250)

## Import Data from Kaggle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
! mkdir ~/.kaggle

In [None]:
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

In [None]:
# kaggle competitions download -c csci-ua-473-intro-to-machine-learning-fall22
! kaggle competitions download csci-ua-473-intro-to-machine-learning-fall22

Downloading csci-ua-473-intro-to-machine-learning-fall22.zip to /content
100% 2.07G/2.08G [00:12<00:00, 129MB/s]
100% 2.08G/2.08G [00:12<00:00, 183MB/s]


In [None]:
! unzip csci-ua-473-intro-to-machine-learning-fall22.zip

Archive:  csci-ua-473-intro-to-machine-learning-fall22.zip
  inflating: test/test/testX.pt      
  inflating: train/train/trainX.pt   
  inflating: train/train/trainY.pt   


## Read Data

In [None]:
import random
import os
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import torch
import torch.nn as nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

from torch.optim import Adam

from torchvision import transforms
from torchvision import models

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
class Train_dataest(Dataset):
    def __init__(self, root_path, data_file='trainX.pt', target_file='trainY.pt',
                 preprocess_rgb=None , preprocess_depth = None):
        self.preprocess_rgb = preprocess_rgb
        self.preprocess_depth = preprocess_depth
        self.data = torch.load(os.path.join(root_path, data_file))
        self.target = torch.load(os.path.join(root_path, target_file))

    def __getitem__(self, index):
        # choose the middle image, img1, for each data sample (without chossing img0 and img2) 
        all_rgb = self.data[0][index]
        rgb_ = all_rgb[1] 
        rgb_ = self.preprocess_rgb(rgb_)
        
        all_depth = self.data[1][index]
        depth = all_depth[1]
        depth = self.preprocess_depth(depth.unsqueeze(0)) / 1000
        depth = depth / torch.max(depth)
        
        # combine rgb and depth
        rgbdimg = torch.cat([rgb_, depth],dim=0)
        target = self.target[0][index]

        return rgbdimg, target

    def __len__(self):
        return self.data[0].shape[0]
    
def preprocess(index, data, preprocess_rgb, preprocess_depth):
    rgb = data[0][index][1]
    rgb = preprocess_rgb(rgb)
    depth = data[1][index][1].resize(1,224,224)
    depth = depth / torch.max(depth)
    rgbd = torch.cat([rgb, depth],dim=0)
    return rgbd.resize(1,4,224,224)

In [None]:
# based on the pretrained model resnet34
croper = transforms.RandomResizedCrop([224, 224])
rgb_scaler = transforms.Normalize([0.485, 0.456, 0.406],
                                  [0.229, 0.224, 0.225])

# data transformations
preprocess_rgb = transforms.Compose([croper,
                                    transforms.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),
                                    rgb_scaler])

preprocess_depth = transforms.Compose([croper])

# read data
dataset = Train_dataest('/content/train/train', 
                         preprocess_rgb=preprocess_rgb ,
                         preprocess_depth = preprocess_depth)

In [None]:
# split training data into train set and test set
train_size = round(0.7 * len(dataset))
test_size = round(0.3 * len(dataset))

train_dataset, test_dataset = random_split(dataset,[train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=16,
                          num_workers=0,
                          pin_memory=True,shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=16, 
                         num_workers=0,
                         pin_memory=True,shuffle=True)
del dataset

## Pretrained Model ResNet34

In [None]:
net = models.resnet34(pretrained=True) # imagenet rgb
net.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3,bias=False) # 3 -> 4_channels
net.fc = nn.Linear(net.fc.in_features, 12) #100 -> 12 label

net = net.to(device)
criterion = nn.MSELoss(reduction='sum')

my_lr = 1e-3
epochs = 200
optimizer = Adam(net.parameters(), lr=my_lr)

train_loss_list = []
val_loss_list = []
for epoch in range(epochs):
    net.train()
    #BN: batch normalize
    print("Epochs :{}".format(epoch+1))
    train_loss_value = 0
    for i_train, data_train in enumerate(train_loader):
        rgbdimg_train, target_train = data_train
        rgbdimg_train = rgbdimg_train.to(device).to(torch.float32)
        target_train = target_train.to(device).to(torch.float32)
        optimizer.zero_grad()
        outputs_train = net(rgbdimg_train)
        train_loss_object = criterion(outputs_train, target_train)
        train_loss_value += train_loss_object.item()
        train_loss_object.backward()
        optimizer.step()
    print('train loss :{}'.format(train_loss_value/(i_train+1)))
    train_loss_list.append(train_loss_value )

    if  epoch % 10 == 0:
        net.eval()
        val_loss_value = 0
        print('')
        for i_test, data_test in enumerate(test_loader):
            rgbdimg_test, target_test = data_test
            rgbdimg_test = rgbdimg_test.to(device).to(torch.float32)
            target_test = target_test.to(device).to(torch.float32)
            optimizer.zero_grad()
            outputs_test = net(rgbdimg_test)
            val_loss_object = criterion(outputs_test, target_test)
            val_loss_value += val_loss_object.item()

        print('Val loss :{}'.format(val_loss_value/(i_test+1)))
        val_loss_list.append(val_loss_value)
del train_loader
del test_loader

## Submission File Generation

In [None]:
import pickle
import pandas as pd
# Predict the unseen data and output the result in CSV file
outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []

unlabeled_data = torch.load('/content/test/test/testX.pt')

In [None]:
net.eval()
preds = []
file_ids = unlabeled_data[-1]
for i in range(len(file_ids)):
    rgbd = preprocess(i, unlabeled_data, preprocess_rgb, preprocess_depth)
    output = net(rgbd.to('cuda'))
    preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))
del unlabeled_data

Written to csv file submission.csv


In [None]:
df

Unnamed: 0,ID,FINGER_POS_1,FINGER_POS_2,FINGER_POS_3,FINGER_POS_4,FINGER_POS_5,FINGER_POS_6,FINGER_POS_7,FINGER_POS_8,FINGER_POS_9,FINGER_POS_10,FINGER_POS_11,FINGER_POS_12
0,146,0.040850,0.053567,0.124052,0.041020,-0.000586,0.123866,0.065628,-0.048473,0.090918,0.050931,0.016365,-0.050495
1,1474,0.023339,0.055574,0.131468,0.030853,-0.000672,0.131411,0.036262,-0.053428,0.127186,0.050832,0.026466,-0.051282
2,190,0.031860,0.053128,0.127088,0.074796,0.000529,0.107552,0.041146,-0.039618,-0.019516,0.070046,0.028570,-0.057650
3,1544,0.030768,0.053339,0.129196,0.061142,-0.002598,0.107207,0.071075,-0.042038,0.049338,0.052334,0.024130,-0.056954
4,952,0.059204,0.050432,0.116065,0.069378,-0.000688,0.107157,0.064839,-0.041389,0.031615,0.054799,0.023573,-0.030704
...,...,...,...,...,...,...,...,...,...,...,...,...,...
844,2647,0.047880,0.052468,0.124654,0.086008,-0.003197,0.077573,0.062439,-0.039430,0.028258,0.054481,0.029515,-0.042459
845,3970,0.037197,0.053662,0.126774,0.067437,-0.002477,0.083801,0.058669,-0.041180,0.033022,0.053480,0.016634,-0.037808
846,1597,0.050042,0.052912,0.121138,0.059197,-0.000933,0.111316,0.059760,-0.052786,0.102429,0.047480,0.033701,-0.039864
847,421,0.033585,0.052180,0.127664,0.050057,-0.001267,0.115063,0.065529,-0.037873,0.008430,0.058089,0.012385,-0.064409
