# Final Project

&copy; 2022 Kaiwen Zhou

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pkl
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import save_image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import cv2
import pickle as pkl

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
def normalize_depth(dep):
    for i in range(len(dep)):
        mean = np.mean(dep[i].flatten())
        std = np.std(dep[i].flatten())
        dep[i] = (dep[i]-mean) /std
    return dep

In [4]:
class LazyLoadDataset(Dataset):
    """
    The point of using lazyloading is to save memory space, so you don't have
    to store all of the data in train set to memory to start training
    """
    def __init__(self, path, train=True, transform=None):
        """
        1. initialize transformation
        2. get path to X and Y for both train and test dataset
        3. get the index list of the data set (0 to 3395)
        """
        # initialize transform
        self.transform = transform
        self.train = train
        
        # setting up path to data X and labels Y
        path = path + ("train/" if train else "test/")
        self.pathX = path + "X/"
        self.pathY = path + "Y/"
        
        # self.data stores the names of files as a list in train/X 
        # (and train/Y as well actually, since they are consistent)
        # it's being used as a list of indices for finding data
        # i.e. 0 to 3395
        self.data = os.listdir(self.pathX)
        #print(self.data)
        
    def __getitem__(self, idx):
        """
        Get 1 instance in the dataset, containing 1 in X its corresponding Y
        
        idx: the index of the files (some number in 0 to 3395)
        """
        # get that file
        f = self.data[idx]
        
        # X
        # read rgb images in the idx specified datapoint using cv2 
        img0 = cv2.imread(self.pathX + f + "/rgb/0.png")
        img1 = cv2.imread(self.pathX + f + "/rgb/1.png")
        img2 = cv2.imread(self.pathX + f + "/rgb/2.png")
        
        # Apply the transform to all 3 images
        # Normally, this contains some data format transformation & normalization
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        
        # read depth images
        depth = np.load(self.pathX + f + "/depth.npy")/1000
        # normalize depth
        depth = normalize_depth(depth)
        
        # read field_id
        field_id = pkl.load(open(self.pathX + f + "/field_id.pkl", "rb"))
        
        # Y
        # read labels
        if self.train is True:
            Y = np.load(self.pathY + f + ".npy")
        else: 
            Y = np.zeros(12)
        
        return (img0, img1, img2, depth, field_id), Y

    
    def __len__(self):
        """
        Get the number of data in the dataset (i.e. the length of self.data)
        """
        return len(self.data)       

In [5]:
# Lazyload the train data with transformation to tensor and normalization
# based on the standard score provided by ImageNet
train_dataset = LazyLoadDataset("./lazydata/", train=True, transform=transforms.Compose([
                       transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]))

"""
shuffling makes batches between epochs do not look alike.
so for every epoch, it will give similar but different results

img0:     torch.Size([64, 3, 224, 224])
img1:     torch.Size([64, 3, 224, 224])
img2:     torch.Size([64, 3, 224, 224])
depth:    torch.Size([64, 3, 224, 224]) 
field_id: tuple of strings of length 64
Y:        torch.Size([64, 12])
"""
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)


test_dataset= LazyLoadDataset("./lazydata/", train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]))

"""
shuffling makes batches between epochs do not look alike.
so for every epoch, it will give similar but different results

img0:     torch.Size([100, 3, 224, 224])
img1:     torch.Size([100, 3, 224, 224])
img2:     torch.Size([100, 3, 224, 224])
depth:    torch.Size([100, 3, 224, 224]) 
field_id: tuple of strings of length 100
Y:        torch.Size([100, 12])
"""
# 
# 
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=True)


In [6]:
# In total, 3396 train dataset pairs: (image, target output) = (3*224*224 matrix, torch.Size([12]) vector)
print(len(train_dataset))
# In total, 849 test dataset pairs: (image, target output) = (3*224*224 matrix, 12 vector)
print(len(test_dataset))

for (img0, img1, img2, depth, field_id), Y in train_loader:
    print(img0.shape)
    print(depth.shape)
    
    break

for (img0, img1, img2, depth, field_id), Y  in test_loader:
    print(depth.shape)
    break

3396
849
torch.Size([64, 3, 224, 224])
torch.Size([64, 3, 224, 224])
torch.Size([100, 3, 224, 224])


In [7]:
train_batch_size = 64

# Try different dataset

In [8]:
def merge_img0_depth0(img, dep):
    data = []
    for i in range(len(img)):
        image = torch.cat((img[i], dep[i][0].reshape(1,224,224))).reshape(1,4,224,224)
        data.append(image)
    return torch.cat(data)

def merge_img1_depth1(img, dep):
    data = []
    for i in range(len(img)):
        image = torch.cat((img[i], dep[i][1].reshape(1,224,224))).reshape(1,4,224,224)
        data.append(image)
    return torch.cat(data)

def merge_img0_depth0_img1_depth1(img0,img1, dep):
    data = []
    for i in range(len(img0)):
        image0 = torch.cat((img0[i], dep[i][0].reshape(1,224,224))).reshape(1,4,224,224)
        image1 = torch.cat((img1[i], dep[i][1].reshape(1,224,224))).reshape(1,4,224,224)
        image = torch.cat([image0, image1])
        data.append(image)
 
    return torch.cat(data)

def merge_img0_img1(img0,img1):
    data = []
    for i in range(len(img0)):
        image = torch.cat((img0[i].reshape(1,3,224,224), img1[i].reshape(1,3,224,224)))
        data.append(image)
 
    return torch.cat(data)

Q: It seems like in each data point X-rgb-2.png and X-depth[2] is not consistent with each other, so probably we should not consider using it.


create dataset with RGBD 4 channels

In [9]:
def train(epoch, model, optimizer):
    """
    Train the model for one epoch

    Args:
        epoch (int): current epoch 
            An epoch means training the neural network with all the training data for one cycle. 
            In an epoch, we use all of the data exactly once. 
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
        permute_pixels (function): function to permute the pixels (default: None)
        permutation_order (1D torch array): order of the permutation (default: None)
    """
    model.train()  # Sets the model in training mode.
    
    for batch_idx, ((img0, img1, img2, depth, field_id), target) in enumerate(train_loader):
        """
        batch_idx: index of batches in train_loader. total dataset = 3396, 64 dataset every batch
                   3396/64 = 54 total number of batches
                   
        data:      torch.Size([64, 3, 224, 224]) -> 64 images, 3 channel/image, 224*224 pixels/image 
                OR torch.Size([64, 4, 224, 224])
                OR torch.Size([128, 4, 224, 224])
        target:    torch.Size([64,12]) -> 64 target output, ndarray of length 12, corresponding to 64 images
                OR torch.Size([128,12])
        """
        # # img0 as input data
        data = img0
        #data = depth
        #data = merge_img0_img1(img0,img1)

        #print(data.shape)
        #target = torch.cat((target,target)).float()
        target = target.float()
        # send to device
        data, target = data.to(device), target.to(device)
        
        # zero out the old gradient; otherwise optimizer is gonna accumulate all the old gradients in itself
        optimizer.zero_grad()
        
        # compute the output generated by the model
        output = model.forward(data)
        
        # compute the value of the loss function
        loss = torch.sqrt(F.mse_loss(output, target))
        
        # this computes the gradient of the loss function for every trainable parameter
        # and the computed gradients are stored by the tensors themselves
        loss.backward()
        
        # calling optimizer.step() makes the optimizer iterate over all parameters (tensors) it  
        # is supposed to update and use their internally stored grad to update their values.
        optimizer.step()
        
        #print(batch_idx)
        # prints out the training status constantly to get a sense of how it is doing
        if batch_idx % 1 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * train_batch_size, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model):
    """
    Test the model

    Args:
        model (nn.Module): model to test
        permute (function): function to permute the pixels (default: None)
        permutation_order (1D torch array): order of the permutation (default: None)
    """
    model.eval()  # Sets the model in evaluation mode.
    
    pred = {}
    for (img0, img1, img2, depth, field_id), target in test_loader:
        """
        data:   torch.Size([1000, 1, 28, 28]) -> 1000 images, 1 channel/image, 28*28 pixels/image 
        target: torch.Size([1000]) -> 1000 target output, (0-9), corresponding to 1000 images
        """
        # # img0 as input data
        data = img0
        #data = depth
        #data = merge_img0_img1(img0,img1)
        
        # send to device
        data, target = data.to(device), target.to(device)

        
        # compute the output generated by the model
        # output: torch.Size([1000, 10])
        output = model.forward(data)
        for i in range(len(field_id)):
            pred[field_id[i]] = output[i].data
        #print(output.shape)
    
    
    return pred

# Write a function for computing the total parameter count of the model

In [10]:
# function to count number of parameters
def get_n_params(model):
    """
    return number of parameters in model
    
    Add up the number of all the trainable parameters (p.requires_grad = True)
    i.e. total # of weights and bias terms
    """
    return sum(np.prod(p.shape) for p in model.parameters() if p.requires_grad) 

### Train a small fully-connected network

Optimizer : SGD with lr=0.01 and momentum=0.5

# Model 1

In [11]:
class CNN_1(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super(CNN_1, self).__init__()
        
        
        # Activation Function (e.g. sigmoid, ReLU)
        self.relu = nn.ReLU()  
        
        #### CONVOLUTION ########
        # Layer1: Convolution with 5*5 kernel
        # (224*224, input_size channel) image -->  (220*220, conv_feature channels) feature maps
        self.conv1 = nn.Conv2d(input_size, conv_feature, kernel_size=5)
        
        # Layer2: Pooling with 2*2 max pooling window
        # (220*220, conv_feature channels) feature maps --> (110*110, conv_feature channels) feature maps
        
        # Layer3: Convolution with 5*5 kernel
        # (110*110, conv_feature channels) feature maps --> (106*106, conv_feature channels) feature maps
        self.conv2 = nn.Conv2d(conv_feature, conv_feature, kernel_size=5)
        
        # Layer4: Pooling with 2*2 max pooling window
        # (106*106, conv_feature channels) feature maps --> (53*53, conv_feature channels) feature maps
        
        #### FCN ###############
        # Flattens a (53*53, conv_feature channels) feature maps to a 53*53*conv_feature array
        self.flatten = nn.Flatten()  
        
        # Layer5: Linear -- setting up Weight Matrix and Bias Vector
        # (53*53, conv_feature channels) feature maps --> 53*53*conv_feature neurons --> fc_feature neurons
        self.linear1 = nn.Linear(conv_feature*53*53, fc_feature)
        
        # Layer6: Linear
        # fc_feature neurons --> output_size neurons
        self.linear2 = nn.Linear(fc_feature, output_size)
        
        
    def forward(self, x):
        """
        x: input data that has the form (# of dataset, # of channels, 28, 28) -> torch.Size([64, 1, 28, 28])
        """
        #print(x.shape)
        # Layer1 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv1(x))
        
        # Layer2 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer3 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv2(x))
        
        # Layer4 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        #print(x.shape)
        # Transform the input data to NeuralNetwork readable data
        x = self.flatten(x)
        #print(x.shape)
        # Layer5 operation: 4*4*conv_feature neurons --> fc_feature neurons
        x = self.relu(self.linear1(x))
        
        # Layer6 operation: fc_feature neurons --> output_size neurons
        x = self.linear2(x)
        
        # Apply the LogSoftMax() function on the last layer
        #results = F.log_softmax(x, dim=1)
        results = x
        return results

# Model 2

In [12]:
class CNN_2(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super(CNN_2, self).__init__()
        
        
        # Activation Function (e.g. sigmoid, ReLU)
        self.relu = nn.ReLU()  
        
        #### CONVOLUTION ########
        # Layer1: Convolution with 5*5 kernel
        # (224*224, input_size channel) image -->  (220*220, conv_feature channels) feature maps
        self.conv1 = nn.Conv2d(input_size, conv_feature, kernel_size=5)
        
        # Layer2: Pooling with 2*2 max pooling window
        # (220*220, conv_feature channels) feature maps --> (110*110, conv_feature channels) feature maps
        
        # Layer3: Convolution with 5*5 kernel
        # (110*110, conv_feature channels) feature maps --> (106*106, conv_feature channels) feature maps
        self.conv2 = nn.Conv2d(conv_feature, conv_feature, kernel_size=5)
        
        # Layer4: Pooling with 2*2 max pooling window
        # (106*106, conv_feature channels) feature maps --> (53*53, conv_feature channels) feature maps
        
        # Layer5: Convolution with 5*5 kernel
        # (53*53, conv_feature channels) feature maps --> (48*48, conv_feature channels) feature maps
        self.conv3 = nn.Conv2d(conv_feature, conv_feature, kernel_size=6)
        
        # Layer6: Pooling with 2*2 max pooling window
        # (48*48, conv_feature channels) feature maps --> (24*24, conv_feature channels) feature maps
        
        #### FCN ###############
        # Flattens a (24*24, conv_feature channels) feature maps to a 24*24*conv_feature array
        self.flatten = nn.Flatten()  
        
        # Layer7: Linear -- setting up Weight Matrix and Bias Vector
        # (24*24, conv_feature channels) feature maps --> 24*24*conv_feature neurons --> conv_feature*12*12
        self.linear1 = nn.Linear(conv_feature*24*24, conv_feature*12*12)
        
        # Layer8: Linear -- setting up Weight Matrix and Bias Vector
        # 12*12*conv_feature neurons --> conv_feature*6*6
        self.linear2 = nn.Linear(conv_feature*12*12, conv_feature*6*6)
        
        # Layer9: Linear -- setting up Weight Matrix and Bias Vector
        # 6*6*conv_feature neurons --> fc_feature=50
        self.linear3 = nn.Linear(conv_feature*6*6, fc_feature)
        
        # Layer10: Linear
        # fc_feature=50 neurons --> output_size neurons
        self.linear4 = nn.Linear(fc_feature, output_size)
        
        
    def forward(self, x):
        """
        x: input data that has the form (# of dataset, # of channels, 28, 28) -> torch.Size([64, 1, 28, 28])
        """
        #print(x.shape)
        # Layer1 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv1(x))
        
        # Layer2 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer3 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv2(x))
        
        # Layer4 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer5 operation: Convolution with 6*6 kernel
        x = self.relu(self.conv3(x))
        
        # Layer6 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Transform the input data to NeuralNetwork readable data
        x = self.flatten(x)
        
        # Layer7 operation: 24*24*conv_feature neurons --> conv_feature*12*12
        x = self.relu(self.linear1(x))
        
        # Layer8 operation: conv_feature*12*12 --> conv_feature*6*6
        x = self.relu(self.linear2(x))
        
        # Layer9 operation: 24*24*conv_feature neurons --> conv_feature*12*12
        x = self.relu(self.linear3(x))
        
        # Layer10 operation: fc_feature neurons --> output_size neurons
        x = self.linear4(x)
        
        # Apply the LogSoftMax() function on the last layer
        #results = F.log_softmax(x, dim=1)
        results = x
        return results

# Model 3

In [13]:
class CNN_3(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super(CNN_3, self).__init__()
        
        
        # Activation Function (e.g. sigmoid, ReLU)
        self.relu = nn.ReLU()  
        
        #### CONVOLUTION ########
        # Layer1: Convolution with 5*5 kernel
        # (224*224, input_size channel) image -->  (220*220, conv_feature channels) feature maps
        self.conv1 = nn.Conv2d(input_size, conv_feature, kernel_size=5)
        
        # Layer2: Pooling with 2*2 max pooling window
        # (220*220, conv_feature channels) feature maps --> (110*110, conv_feature channels) feature maps
        
        # Layer3: Convolution with 5*5 kernel
        # (110*110, conv_feature channels) feature maps --> (106*106, conv_feature channels) feature maps
        self.conv2 = nn.Conv2d(conv_feature, conv_feature, kernel_size=5)
        
        # Layer4: Pooling with 2*2 max pooling window
        # (106*106, conv_feature channels) feature maps --> (53*53, conv_feature channels) feature maps
        
        # Layer5: Convolution with 5*5 kernel
        # (53*53, conv_feature channels) feature maps --> (48*48, 16 channels) feature maps
        self.conv3 = nn.Conv2d(conv_feature, 16, kernel_size=6)
        
        # Layer6: Pooling with 2*2 max pooling window
        # (48*48, 16 channels) feature maps --> (24*24, 16 channels) feature maps
        
        # Layer7: Convolution with 5*5 kernel
        # (24*24, 16 channels) feature maps --> (20*20, 24 channels) feature maps
        self.conv4 = nn.Conv2d(16, 24, kernel_size=5)
        
        # Layer8: Pooling with 2*2 max pooling window
        # (20*20, 16 channels) feature maps --> (10*10, 24 channels) feature maps
        
        #### FCN ###############
        # Flattens a (10*10, 24 channels) feature maps to a 10*10*24 array
        self.flatten = nn.Flatten()  
        
        # Layer7: Linear -- setting up Weight Matrix and Bias Vector
        # (10*10, 24 channels) feature maps --> 10*10*24 neurons --> fc_feature
        self.linear1 = nn.Linear(24*10*10, fc_feature)
        
        # Layer8: Linear
        # fc_feature=50 neurons --> output_size neurons
        self.linear2 = nn.Linear(fc_feature, output_size)
        
        
    def forward(self, x):
        """
        x: input data that has the form (# of dataset, # of channels, 28, 28) -> torch.Size([64, 1, 28, 28])
        """
        #print(x.shape)
        # Layer1 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv1(x))
        
        # Layer2 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer3 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv2(x))
        
        # Layer4 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer5 operation: Convolution with 6*6 kernel
        x = self.relu(self.conv3(x))
        
        # Layer6 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer7 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv4(x))
        
        # Layer8 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Transform the input data to NeuralNetwork readable data
        x = self.flatten(x)
        
        # Layer9 operation: 10*10*24 neurons --> fc_feature neurons
        x = self.relu(self.linear1(x))

        
        # Layer10 operation: fc_feature neurons --> output_size neurons
        x = self.linear2(x)
        
        # Apply the LogSoftMax() function on the last layer
        #results = F.log_softmax(x, dim=1)
        results = x
        return results

# 4

In [14]:
class CNN_4(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super(CNN_4, self).__init__()
        
        
        # Activation Function (e.g. sigmoid, ReLU)
        self.relu = nn.ReLU()  
        
        #### CONVOLUTION ########
        # Layer1: Convolution with 5*5 kernel
        # (224*224, input_size channel) image -->  (220*220, conv_feature channels) feature maps
        self.conv1 = nn.Conv2d(input_size, conv_feature, kernel_size=5)
        
        # Layer2: Pooling with 2*2 max pooling window
        # (220*220, conv_feature channels) feature maps --> (110*110, conv_feature channels) feature maps
        
        # Layer3: Convolution with 5*5 kernel
        # (110*110, conv_feature channels) feature maps --> (106*106, conv_feature channels) feature maps
        self.conv2 = nn.Conv2d(conv_feature, conv_feature, kernel_size=5)
        
        # Layer4: Pooling with 2*2 max pooling window
        # (106*106, conv_feature channels) feature maps --> (53*53, conv_feature channels) feature maps
        
        # Layer5: Convolution with 5*5 kernel
        # (53*53, conv_feature channels) feature maps --> (48*48, 16 channels) feature maps
        self.conv3 = nn.Conv2d(conv_feature, 16, kernel_size=6)
        
        # Layer6: Pooling with 2*2 max pooling window
        # (48*48, 16 channels) feature maps --> (24*24, 16 channels) feature maps
        
        #### FCN ###############
        # Flattens a (24*24, 16 channels) feature maps to a 24*24*16 array
        self.flatten = nn.Flatten()  
        
        # Layer7: Linear -- setting up Weight Matrix and Bias Vector
        # (24*24, 16 channels) feature maps --> 24*24*16 neurons --> 16*12*12
        self.linear1 = nn.Linear(16*24*24, fc_feature)
        
        # Layer10: Linear
        # fc_feature=50 neurons --> output_size neurons
        self.linear4 = nn.Linear(fc_feature, output_size)
        
        
    def forward(self, x):
        """
        x: input data that has the form (# of dataset, # of channels, 28, 28) -> torch.Size([64, 1, 28, 28])
        """
        #print(x.shape)
        # Layer1 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv1(x))
        
        # Layer2 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer3 operation: Convolution with 5*5 kernel
        x = self.relu(self.conv2(x))
        
        # Layer4 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Layer5 operation: Convolution with 6*6 kernel
        x = self.relu(self.conv3(x))
        
        # Layer6 operation: Pooling with 2*2 max pooling window
        x = F.max_pool2d(x, kernel_size=2)
        
        # Transform the input data to NeuralNetwork readable data
        x = self.flatten(x)
        
        # Layer7 operation: 24*24*conv_feature neurons --> conv_feature*12*12
        x = self.relu(self.linear1(x))
        
        
        # Layer10 operation: fc_feature neurons --> output_size neurons
        x = self.linear4(x)
        
        # Apply the LogSoftMax() function on the last layer
        #results = F.log_softmax(x, dim=1)
        results = x
        return results

### Train a ConvNet with the same number of parameters

In [18]:
# Training settings 
# input_size_cnn = 3 # number of channels
# input_size_cnn = 4 # number of channels
input_size_cnn = 3 # number of channels
conv_features = 15 # number of feature maps
fc_features = 5000
output_size = 12

model_cnn = CNN_1(input_size_cnn, conv_features, fc_features, output_size) # create CNN model
model_cnn.to(device)
optimizer = optim.SGD(model_cnn.parameters(), lr=0.007, momentum=0.92)  # use SGD with learning rate 0.01 and momentum 0.5
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

test_loss = []
for epoch in range(0, 1):
   
    train(epoch, model_cnn, optimizer)
        

Number of parameters: 210746792


# Predict Test Set

In [16]:
pred  = test(model_cnn)

# Output csv file for submission

In [17]:
# Prints out the final results: 
#print(pred)

outfile = 'submission16.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []
for key, item in pred.items():
    lis = []
    lis.append(key)
    lis.extend(list(np.array(item)))
    preds.append(lis)

df = pd.DataFrame(preds)
df.columns = titles
print(df)
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

       ID  FINGER_POS_1  FINGER_POS_2  FINGER_POS_3  FINGER_POS_4  \
0    3596      0.049071      0.049423      0.125850      0.094789   
1     464      0.034041      0.048613      0.125433      0.023360   
2    3315      0.075615      0.043187      0.106857      0.097463   
3     180      0.027825      0.054264      0.128523      0.037525   
4    3488      0.069998      0.045921      0.106370      0.069000   
..    ...           ...           ...           ...           ...   
844  2408      0.080638      0.053840      0.096375      0.086239   
845  1437      0.068833      0.049884      0.119150      0.111676   
846  3189      0.035492      0.051378      0.127081      0.049323   
847  3258      0.068011      0.054254      0.084629      0.082881   
848   600      0.031115      0.052001      0.124860      0.057759   

     FINGER_POS_5  FINGER_POS_6  FINGER_POS_7  FINGER_POS_8  FINGER_POS_9  \
0        0.003020      0.083745      0.084146     -0.033510     -0.012411   
1       -0.003939