In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm

In [4]:
import torch

if torch.cuda.is_available(): #This checks if GPU is available and determines which device to run on. So if GPU unavailable, runs on CPU
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

print(device)

# torch.cuda.device_count() 
# We can assign various parts of the network across multiple GPUs if possible. Cannot cross interact 

cuda:0


In [None]:
'''
PyTorch Implementation: 
https://www.youtube.com/playlist?list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG

Neural Network theory:
https://www.youtube.com/playlist?list=PLWKotBjTDoLj3rXBL-nEIPRN9V3a9Cx07 

        Layers:
            
            Constraints:
                Conv2d input channels should be 1 (colour channels)
                Linear output channels should be 2 (Left/Right)
                Conv2d to linear conversion has a factor to flatten tensor. No idea where this comes from. 


            Recommendations:
                Have convolution output sizes increasing when you go deeper
                Invert for linear models

                - Start small, increase as you go 
                - Generally go deeper for more accuracy, this is harder to train. Can add skip connections to aid
                - 3x3 or 1x1 kernals tend to work best. 1x1 can learn cross channel features and perform dimensional reduction, improving efficiency 
                - max pooling 2x2, same padding 
                - ReLU activation 
                - Use L2 weight decay and dropout for regularization 
                - batch size 32

            General:
                Conv2d size -- 
                torch.Size([<output_channels>, <input_channels>, <filter_size>, <filter_size>])

                Linear layer size -- 
                torch.Size([<output_channels>, <input_channels>])

        Training:
            Goal: 99% out of sample accuracy 
            
            Constraints:
            
            Recommendations:
                Train while removing nodes
                Train on GPU 

                - Add momentum factor (average of past steps) 
                - Decrease learning rate over time 
                - Batch normalization (normalize each layer output, then scale by trainable parameters for mean and std)
                - initialize biases to zero 
                - initialize weights to be non symmetric (use Glorot (Xavier) initializer)

        Other generally useful things I've learnt:
            __repr__(self): function. Is called when object is printed
            __call__(self): function. Is called when object is called 
'''

In [5]:
import torch.nn as nn
import torch.nn.functional as F 

# torch.set_grad_enabled(False) # Turns off dynamic memory of calculations. Run this code to improved performance after model is trained. 

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(1, 32, 5) # 1 input, 32 convolution feature output, 5 kernal size
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)

        #self.fc1 = nn.Linear(??, 512)
        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 2)

    def convs(self, x):
        '''This function is used to find the size of the output of the convolutional layers. There may be a better way to do this, but the tutorial didn't know'''
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))

        # print(x[0].shape)
        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1,self._to_linear) #flattens output of convolution 
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # Don't run relu on last layer. Why? Idk
        return F.softmax(x, dim=1) # Activation function on the output 

model = Net().to(device) 