<a href="https://colab.research.google.com/github/monilchheda/manning-live-project-human-pose-estimation/blob/master/week2_svhn_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Downloader

In [0]:
from torchvision import datasets
from torchvision import transforms

# https://pytorch.org/docs/master/torchvision/transforms.html#torchvision.transforms.ToTensor
# toTensor auto normalizes image between 0 and 1

train = datasets.SVHN(root='.', split='train', transform=transforms.Compose([transforms.ToTensor()]), download=True)
test = datasets.SVHN(root='.', split='test', transform=transforms.Compose([transforms.ToTensor()]), download=True)

2. DataLoader

In [0]:
import torch

train_loader = torch.utils.data.DataLoader( 
                                            train,
                                            batch_size=512,
                                            shuffle=True,
                                            num_workers=2
                                           )

test_loader = torch.utils.data.DataLoader(  
                                            test,
                                            batch_size=512,
                                            shuffle=True,
                                            num_workers=2
                                          )


3. Model Architecture

In [0]:
# https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html
import torch.nn.functional as F
import torch.nn as nn

class NetworkModel(nn.Module):
    def __init__(self):
        super(NetworkModel, self).__init__()
        self.conv1 = nn.Conv2d(kernel_size=5, in_channels=3, out_channels=6, stride=1, padding=2)
        # https://stackoverflow.com/questions/49433936/how-to-initialize-weights-in-pytorch
        # Not really sure why we set it like this... need more research
        self.conv1.bias.data.fill_(0)
        # https://pytorch.org/docs/stable/nn.init.html
        nn.init.normal_(self.conv1.weight,std=0.001)
        self.conv1_bn = nn.BatchNorm2d(6)

        self.conv2 = nn.Conv2d(kernel_size=5, in_channels=6, out_channels=12, stride=1, padding=1)
        self.conv2.bias.data.fill_(0)
        nn.init.normal_(self.conv2.weight,std=0.001)
        self.conv2_bn = nn.BatchNorm2d(12)

        self.conv3 = nn.Conv2d(kernel_size=3, in_channels=12, out_channels=24, stride=1, padding=1)
        self.conv3.bias.data.fill_(0)
        nn.init.normal_(self.conv3.weight,std=0.001)
        self.conv3_bn = nn.BatchNorm2d(24)

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(24*3*3, 10)



    def forward(self, x):
        x = F.relu(self.maxpool(self.conv1(x)))
        x = F.relu(self.maxpool(self.conv2(x)))
        x = F.relu(self.maxpool(self.conv3(x)))
#        x = x.view(-1, x.size(0))
        x = x.view(-1, 24*3*3)
        x = self.fc(x)
        return x

mynetworkmodel = NetworkModel()
print(mynetworkmodel)
print(mynetworkmodel.conv1.weight.shape)
print(mynetworkmodel.conv2.weight.shape)
print(mynetworkmodel.conv3.weight.shape)
print(mynetworkmodel.fc.weight.shape)


In [0]:
import torch.optim as optim

device = torch.device('cuda:0')

model = mynetworkmodel
model.to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [0]:
# new training https://gist.github.com/gagejustins/76ab1f37b83684032566b276fe3a5289#file-simplecnn-py

import time
from torch.autograd import Variable

def trainNet(net, n_epochs, learning_rate, optimizer, loss_func):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
#    train_loader = get_train_loader(batch_size)
    n_batches = len(train_loader)
    print(n_batches)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            #Get inputs
            # switch to GPU https://stackoverflow.com/questions/59013109/runtimeerror-input-type-torch-floattensor-and-weight-type-torch-cuda-floatte
            inputs, labels = data
            inputs, labels = inputs.cuda(), labels.cuda() # add this line

            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()

            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()
            
            #Print statistics
            # getting error - fixed using https://stackoverflow.com/questions/56483122/indexerror-invalid-index-of-a-0-dim-tensor-use-tensor-item-to-convert-a-0-di
            running_loss += loss.data
            total_train_loss += loss.data
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()

    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

trainNet(mynetworkmodel,  n_epochs=20, learning_rate=0.001,  optimizer=optimizer, loss_func=loss_func)


In [0]:
# Accuracy function

def get_accuracy(model, dataloader):

    model.eval()

    # https://stackoverflow.com/questions/60018578/what-does-model-eval-do-in-pytorch
    with torch.no_grad():
    # https://adventuresinmachinelearning.com/convolutional-neural-networks-tutorial-in-pytorch/
      correct = 0
      total = 0
      for data in dataloader:
          image, labels = data
          image, labels = image.cuda(), labels.cuda() # add this line
          outputs = model(image)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

    model.train()
    return (correct / total) * 100


print (get_accuracy(mynetworkmodel, train_loader))
print (get_accuracy(mynetworkmodel, test_loader))

In [0]:
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py

import matplotlib.pyplot as plt
import numpy as np
import torchvision


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

for i in range(100,105):
    imshow(images[i])
    print(labels[i])

#id = 23456
#im, lbl = test[id]
#print("label: ", lbl)
#imshow(test[id][0])



In [0]:
# test if all is working!!!

id=120
mynetworkmodel.eval()
with torch.no_grad():
    image, label = test[id]
    image = image.cuda()
    image = image.reshape(1,3,32,32).to(device)
    label = torch.tensor(label).to(device)           
    output = mynetworkmodel(image)
    _, predicted = torch.max(output,1)
    print("label: ", label)
    print("prediction: ", predicted)
    imshow(test[id][0])
        
mynetworkmodel.train()

In [0]:
# Custom dataset provider

from torch.utils.data import Dataset
class SVHN_dataset(Dataset):

    def __init__(self, data):
        # Transform data to Torch Tensors
        self.images = torch.tensor(data['X']).permute([3,2,0,1])
        self.labels = torch.tensor(data['y'])
        self.size = self.labels.shape[0]

        # replace label 10 with label 0
        # TODO
        self.labels[self.labels==10] = 0


        # convert to float and 
        # TODO https://pytorch.org/docs/stable/tensors.html
        self.images = self.images.to(torch.float)
        # normalize images to 0..1 range
        self.images = self.images/255

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]




In [0]:
from scipy.io import loadmat
import torch
import torch.optim as optim

device = torch.device('cuda:0')

model = mynetworkmodel
model.to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_data = loadmat('train_32x32.mat')
test_data = loadmat('test_32x32.mat')

ctrain = SVHN_dataset(train_data)
ctest = SVHN_dataset(test_data)

#print(ctrain.size, ctest.size)

# Data loaders
c_train_loader = torch.utils.data.DataLoader( 
                                            train_data,
                                            batch_size=512,
                                            shuffle=True,
                                            num_workers=2
                                           )

c_test_loader = torch.utils.data.DataLoader(  
                                            test_data,
                                            batch_size=512,
                                            shuffle=False,
                                            num_workers=2
                                          )

c_model = NetworkModel()
c_model.to(device)

c_optimizer = optim.Adam(c_model.parameters(), lr=0.001)

trainNet(c_model,  n_epochs=10, learning_rate=0.001,  optimizer=c_optimizer, loss_func=loss_func)

# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py

import matplotlib.pyplot as plt
import numpy as np
import torchvision


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

#id = 23456
#im, lbl = test[id]
#print("label: ", lbl)
#imshow(test[id][0])


id=12000
c_model.eval()
with torch.no_grad():
    image, label = test[id]
    image = image.cuda()
    image = image.reshape(1,3,32,32).to(device)
    label = torch.tensor(label).to(device)           
    output = c_model(image)
    _, predicted = torch.max(output,1)
    print("label: ", label)
    print("prediction: ", predicted)
    imshow(test[id][0])
        
c_model.train()