# model selection

My initial model was pretty random, somehow based on the homework for fashion MNIST.

I changed the 10 output classes to 2 and the b/w 1 channel input to 3 channels. The size of the images also had to be adjusted.

When I work with the face images I expect that a high resolution is necessary. We should need more detail than what is needed to distinguish b/w a jeans and a boot. I have images 256 x 256 which is more than enough for the human eye. I will work with 128x128 image size. 64x64 still looks ok to me personally, but I don't want to start out throwing information away too much ...
* 
* 

In [1]:
!pwd
main_dir = '/home/jovyan/LFI/bainbridge_2k/'

# location of original labels: LFI/bainbridge_2k/meta
label_file = '../meta/labels2k.xlsx'

images_dir = '../images/'
!ls $images_dir | wc -l #location for images , should output the number 2222
!echo '*********'
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install torch
!pip install torchvision

/home/jovyan/LFI/bainbridge_2k/code
2222
requirements.txt
*********


In [2]:
!pip install pytorch-model-summary
import pytorch_model_summary as pms


Collecting pytorch-model-summary
  Using cached pytorch_model_summary-0.1.2-py3-none-any.whl (9.3 kB)
Installing collected packages: pytorch-model-summary
Successfully installed pytorch-model-summary-0.1.2


In [3]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
#import cv2 # would like to remove this

import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler

import torchvision
from torchvision import transforms, utils

import PIL

# define the models

In [4]:
class MyNeuralNetwork1(nn.Module):
    '''original model taken from homework, for fashion MNIST.
    Number of classes reduced from 10 to 2, 
    number of channels increased from 1 (b/w) to 3 (colour images).'''
    def __init__(self):
        super(MyNeuralNetwork1, self).__init__() 
        # TODO: YOUR CODE HERE
        # no padding yet
        self.conv1 = nn.Conv2d(3, 32, 3, stride=1, padding=1) # img.size 128*128=16'384 , 32 f-maps of size 124*124=15'376
        self.conv2 = nn.Conv2d(32, 32, 3, stride=1, padding=1) # input feature.size 128*128
        self.pool1 = nn.MaxPool2d(3, stride=2) # input feature.size halved to 64*64
        self.dropout1 = nn.Dropout2d(0.25)

        self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1) # 62*62
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) # 60*60
        self.pool2 = nn.MaxPool2d(3, stride=2) # features halved to size 30*30 
        self.dropout2 = nn.Dropout2d(0.25)

        # 64*29*29 = 53'824 , 61'504=64*961=64*31*31
        self.fc1 = nn.Linear(61504, 1024)
        self.fc2 = nn.Linear(1024, 2)

    def forward(self, x):
        # TODO: YOUR CODE HERE
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.dropout1(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = self.dropout2(x)

        #flatten
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        #x = F.relu(x) # removed this. with this i got outputs as mainly 0 - because most x before relu were negative.
        # cf https://discuss.pytorch.org/t/always-output-of-0/21784/3
        # suggesting to use no non-linearity before nn.CrossEntropyLoss.
        
        return x

In [6]:
# summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=True)
# https://pypi.org/project/pytorch-model-summary/
print(pms.summary(MyNeuralNetwork1(), torch.zeros((1, 3, 128, 128)), show_input=True))
#print(pms.summary(MyNeuralNetwork1(), torch.zeros((1, 3, 128, 128)), show_input=False))

-------------------------------------------------------------------------
      Layer (type)           Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 128, 128]             896             896
          Conv2d-2     [1, 32, 128, 128]           9,248           9,248
       MaxPool2d-3     [1, 32, 128, 128]               0               0
       Dropout2d-4       [1, 32, 63, 63]               0               0
          Conv2d-5       [1, 32, 63, 63]          18,496          18,496
          Conv2d-6       [1, 64, 63, 63]          36,928          36,928
       MaxPool2d-7       [1, 64, 63, 63]               0               0
       Dropout2d-8       [1, 64, 31, 31]               0               0
          Linear-9            [1, 61504]      62,981,120      62,981,120
         Linear-10             [1, 1024]           2,050           2,050
Total params: 63,048,738
Trainable params: 63,048,738
Non-trainable params: 0
---------------------------------------------

In [121]:
class MyNeuralNetwork2(nn.Module):
    def __init__(self):
        '''Only 2 conv layers with pooling and dropout
        Huge 1st fc layer, then immidiately reduce to 2 nodes in 2nd fc layer.'''
        super(MyNeuralNetwork2, self).__init__() 

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1) # img.size 64*64=4096 , 32 f-maps of size 60*60=3'600
        self.conv2 = nn.Conv2d(32, 32, 3, stride=1, padding=1) #
        self.pool1 = nn.MaxPool2d(3, stride=2) # input feature.size halved to 32*32
        self.dropout1 = nn.Dropout2d(0.25)

        # 64*29*29 = 53'824 , 61'504=64*961=64*31*31
        # 32*31*31 = 30'752
        self.fc1 = nn.Linear(30752, 2)
        self.fc2 = nn.Linear(2, 2)


    def forward(self, x):
        # TODO: YOUR CODE HERE
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.dropout1(x)

        #flatten
        print('right before flatten : ' , [x.size(i) for i in range(4)])
        print(x.size(0))
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        #x = F.relu(x) # removed this. with this i got outputs as mainly 0 - because most x before relu were negative.
        # cf https://discuss.pytorch.org/t/always-output-of-0/21784/3
        # suggesting to use no non-linearity before nn.CrossEntropyLoss.
        
        return x

In [122]:
print(pms.summary(MyNeuralNetwork2(), torch.zeros((1, 3, 64, 64)), show_input=True))
print(pms.summary(MyNeuralNetwork2(), torch.zeros((1, 3, 64, 64)), show_input=False))

right before flatten :  [1, 32, 31, 31]
1
-----------------------------------------------------------------------
      Layer (type)         Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 64, 64]             896             896
          Conv2d-2     [1, 32, 64, 64]           9,248           9,248
       MaxPool2d-3     [1, 32, 64, 64]               0               0
       Dropout2d-4     [1, 32, 31, 31]               0               0
          Linear-5          [1, 30752]          61,506          61,506
          Linear-6              [1, 2]               6               6
Total params: 71,656
Trainable params: 71,656
Non-trainable params: 0
-----------------------------------------------------------------------
right before flatten :  [1, 32, 31, 31]
1
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv2d-1     [1, 32, 64, 64]             896        

In [5]:
class MyNeuralNetwork3(nn.Module):
    def __init__(self):
        super(MyNeuralNetwork3, self).__init__() 
        
        # reduced to 8 feature maps
        self.conv1 = nn.Conv2d(3, 4, 3, stride=1, padding=1) # img.size 64*64=4096 , 8 f-maps
        self.pool1 = nn.MaxPool2d(3, stride=2) # input feature.size halved to 32*32
        self.dropout1 = nn.Dropout2d(0.25)

        # 64*29*29 = 53'824 , 61'504=64*961=64*31*31
        # 32*31*31 = 30'752 for 32 feature, half of it for 16 features
        self.fc1 = nn.Linear(3844, 2)
        self.fc2 = nn.Linear(2, 2)


    def forward(self, x):
        #print('initial input tensor size : ' , [x.size(i) for i in range(4)])
        x = self.conv1(x)
        #print('after conv :' , [x.size(i) for i in range(4)])
        x = F.relu(x)
        #x = self.conv2(x)
        #x = F.relu(x)
        x = self.pool1(x)
        #print('after pooling : ' , [x.size(i) for i in range(4)])
        x = self.dropout1(x)

        #flatten
        #print('right before flatten : ' , [x.size(i) for i in range(4)])
        #print(x.size(0))
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        #x = F.relu(x) # removed this. with this i got outputs as mainly 0 - because most x before relu were negative.
        # cf https://discuss.pytorch.org/t/always-output-of-0/21784/3
        # suggesting to use no non-linearity before nn.CrossEntropyLoss.
        
        return x

In [6]:
print(pms.summary(MyNeuralNetwork3(), torch.zeros((1, 3, 64, 64)), show_input=True))
print(pms.summary(MyNeuralNetwork3(), torch.zeros((1, 3, 64, 64)), show_input=False))

-----------------------------------------------------------------------
      Layer (type)         Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 64, 64]             112             112
       MaxPool2d-2      [1, 4, 64, 64]               0               0
       Dropout2d-3      [1, 4, 31, 31]               0               0
          Linear-4           [1, 3844]           7,690           7,690
          Linear-5              [1, 2]               6               6
Total params: 7,808
Trainable params: 7,808
Non-trainable params: 0
-----------------------------------------------------------------------
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv2d-1      [1, 4, 64, 64]             112             112
       MaxPool2d-2      [1, 4, 31, 31]               0               0
       Dropout2d-3      [1, 4, 31, 31]               0               0
      

In [140]:
class MyNeuralNetwork4(nn.Module):
    def __init__(self):
        super(MyNeuralNetwork4, self).__init__() 
        self.conv1 = nn.Conv2d(3, 32, 3, stride=1, padding=1) # img.size 128*128=16'384 , 32 f-maps of size 124*124=15'376
        self.conv2 = nn.Conv2d(32, 32, 3, stride=1, padding=1) # input feature.size 128*128
        self.pool1 = nn.MaxPool2d(3, stride=2) # input feature.size halved to 64*64
        self.dropout1 = nn.Dropout2d(0.25)

        # no increase in output channels, stick to 32 , maybe already decrease them?
        self.conv3 = nn.Conv2d(32, 32, 3, stride=1, padding=1) # 30*30
        self.conv4 = nn.Conv2d(32, 32, 3, stride=1, padding=1) # 28*28
        self.pool2 = nn.MaxPool2d(3, stride=2) # features halved to size 14*14 
        self.dropout2 = nn.Dropout2d(0.25)

        # 64*29*29 = 53'824 , 61'504=64*961=64*31*31
        self.fc1 = nn.Linear(30752, 4)
        self.fc2 = nn.Linear(4, 2)

    def forward(self, x):
        # TODO: YOUR CODE HERE
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.dropout1(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = self.dropout2(x)

        #flatten
        print('right before flatten : ' , [x.size(i) for i in range(4)])
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        #x = F.relu(x) # removed this. with this i got outputs as mainly 0 - because most x before relu were negative.
        # cf https://discuss.pytorch.org/t/always-output-of-0/21784/3
        # suggesting to use no non-linearity before nn.CrossEntropyLoss.
        
        return x

In [141]:
print(pms.summary(MyNeuralNetwork4(), torch.zeros((1, 3, 128, 128)), show_input=True))
print(pms.summary(MyNeuralNetwork4(), torch.zeros((1, 3, 128, 128)), show_input=False))

right before flatten :  [1, 32, 31, 31]
-------------------------------------------------------------------------
      Layer (type)           Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 128, 128]             896             896
          Conv2d-2     [1, 32, 128, 128]           9,248           9,248
       MaxPool2d-3     [1, 32, 128, 128]               0               0
       Dropout2d-4       [1, 32, 63, 63]               0               0
          Conv2d-5       [1, 32, 63, 63]           9,248           9,248
          Conv2d-6       [1, 32, 63, 63]           9,248           9,248
       MaxPool2d-7       [1, 32, 63, 63]               0               0
       Dropout2d-8       [1, 32, 31, 31]               0               0
          Linear-9            [1, 30752]         123,012         123,012
         Linear-10                [1, 4]              10              10
Total params: 151,662
Trainable params: 151,662
Non-trainable params: 0
-----------

In [145]:
class MyNeuralNetwork5(nn.Module):
    def __init__(self):
        super(MyNeuralNetwork5, self).__init__() 
        # TODO: YOUR CODE HERE
        # no padding yet
        self.conv1 = nn.Conv2d(3, 24, 3, stride=1, padding=1) # img.size 128*128=16'384 , 32 f-maps of size 124*124=15'376
        self.conv2 = nn.Conv2d(24, 24, 3, stride=1, padding=1) # input feature.size 128*128
        self.pool1 = nn.MaxPool2d(3, stride=2) # input feature.size halved to 64*64
        self.dropout1 = nn.Dropout2d(0.25)

        # no increase in output channels, stick to 32 , maybe already decrease them?
        self.conv3 = nn.Conv2d(24, 24, 3, stride=1, padding=1) # 30*30
        self.conv4 = nn.Conv2d(24, 24, 3, stride=1, padding=1) # 28*28
        self.pool2 = nn.MaxPool2d(3, stride=2) # features halved to size 14*14 
        self.dropout2 = nn.Dropout2d(0.25)

        # 64*29*29 = 53'824 , 61'504=64*961=64*31*31
        self.fc1 = nn.Linear(23064, 4)
        self.fc2 = nn.Linear(4, 2)

    def forward(self, x):
        # TODO: YOUR CODE HERE
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.dropout1(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = self.dropout2(x)

        #flatten
        #print('right before flatten : ' , [x.size(i) for i in range(4)])
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        #x = F.relu(x) # removed this. with this i got outputs as mainly 0 - because most x before relu were negative.
        # cf https://discuss.pytorch.org/t/always-output-of-0/21784/3
        # suggesting to use no non-linearity before nn.CrossEntropyLoss.
        
        return x

In [146]:
print(pms.summary(MyNeuralNetwork5(), torch.zeros((1, 3, 128, 128)), show_input=True))
# print(pms.summary(MyNeuralNetwork5(), torch.zeros((1, 3, 128, 128)), show_input=False))

right before flatten :  [1, 24, 31, 31]
-------------------------------------------------------------------------
      Layer (type)           Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 128, 128]             672             672
          Conv2d-2     [1, 24, 128, 128]           5,208           5,208
       MaxPool2d-3     [1, 24, 128, 128]               0               0
       Dropout2d-4       [1, 24, 63, 63]               0               0
          Conv2d-5       [1, 24, 63, 63]           5,208           5,208
          Conv2d-6       [1, 24, 63, 63]           5,208           5,208
       MaxPool2d-7       [1, 24, 63, 63]               0               0
       Dropout2d-8       [1, 24, 31, 31]               0               0
          Linear-9            [1, 23064]          92,260          92,260
         Linear-10                [1, 4]              10              10
Total params: 108,566
Trainable params: 108,566
Non-trainable params: 0
-----------

In [13]:
class MyNeuralNetwork6(nn.Module):
    def __init__(self):
        '''3 blocks of convolution pooling, dropout.
        Going down to 6x6 feature sizes.'''

        super(MyNeuralNetwork6, self).__init__() 
        self.conv1 = nn.Conv2d(3, 24, 3, stride=1, padding=1) # img.size 128*128=16'384 , 24 f-maps of size 124*124=15'376
        self.pool1 = nn.MaxPool2d(3, stride=2) # input feature.size halved to 62*62
        self.dropout1 = nn.Dropout2d(0.25)

        self.conv2 = nn.Conv2d(24, 24, 3, stride=1, padding=1) # 30*30
        self.pool2 = nn.MaxPool2d(3, stride=2) # features halved to size 15*15
        self.dropout2 = nn.Dropout2d(0.25)
        
        self.conv3 = nn.Conv2d(24, 24, 3, stride=1, padding=1) # 13*13
        self.pool3 = nn.MaxPool2d(3, stride=2) # features halved to size 6*6
        self.dropout3 = nn.Dropout2d(0.25)

        self.fc1 = nn.Linear(5400, 16)
        self.fc2 = nn.Linear(16, 2)

    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.dropout1(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = self.dropout2(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool3(x)
        x = self.dropout3(x)

        #flatten
        print('right before flatten : ' , [x.size(i) for i in range(4)])
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        
        return x

In [14]:
print(pms.summary(MyNeuralNetwork6(), torch.zeros((1, 3, 128, 128)), show_input=True))
# print(pms.summary(MyNeuralNetwork6(), torch.zeros((1, 3, 128, 128)), show_input=False))

right before flatten :  [1, 24, 15, 15]
-------------------------------------------------------------------------
      Layer (type)           Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 128, 128]             672             672
       MaxPool2d-2     [1, 24, 128, 128]               0               0
       Dropout2d-3       [1, 24, 63, 63]               0               0
          Conv2d-4       [1, 24, 63, 63]           5,208           5,208
       MaxPool2d-5       [1, 24, 63, 63]               0               0
       Dropout2d-6       [1, 24, 31, 31]               0               0
          Conv2d-7       [1, 24, 31, 31]           5,208           5,208
       MaxPool2d-8       [1, 24, 31, 31]               0               0
       Dropout2d-9       [1, 24, 15, 15]               0               0
         Linear-10             [1, 5400]          86,416          86,416
         Linear-11               [1, 16]              34              34
Total para

I put those models in a .py file to import from

In [15]:
from myModels import myModels as mm

In [17]:
mm1 = mm.MyNeuralNetwork1()

In [18]:
print(pms.summary(mm1, torch.zeros((1, 3, 128, 128)), show_input=True))

-------------------------------------------------------------------------
      Layer (type)           Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 128, 128]             896             896
          Conv2d-2     [1, 32, 128, 128]           9,248           9,248
       MaxPool2d-3     [1, 32, 128, 128]               0               0
       Dropout2d-4       [1, 32, 63, 63]               0               0
          Conv2d-5       [1, 32, 63, 63]          18,496          18,496
          Conv2d-6       [1, 64, 63, 63]          36,928          36,928
       MaxPool2d-7       [1, 64, 63, 63]               0               0
       Dropout2d-8       [1, 64, 31, 31]               0               0
          Linear-9            [1, 61504]      62,981,120      62,981,120
         Linear-10             [1, 1024]           2,050           2,050
Total params: 63,048,738
Trainable params: 63,048,738
Non-trainable params: 0
---------------------------------------------