Recreation of https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch import save
from torch import load
from os import path

#import torch.utils.tensorboard as tb

from PIL import Image

from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 
import random 
import os, math

import gc

import pdb
from skimage import io 


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

Reproducibility

In [2]:
# Set manual seed.
def runRamdomSeed():
    torch.manual_seed(234)
    np.random.seed(234)
    random.seed(234)
    # Disabling the benchmarking feature with torch.backends.cudnn.benchmark = False 
    # causes cuDNN to deterministically select an algorithm, possibly at the cost of reduced performance.
    torch.backends.cudnn.benchmark = False 

runRamdomSeed()

First we need to import our data.

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Then we need to establish how we will transform the data to fit into NN

In [4]:
unused_data_transforms = transforms.Compose([
        # transforms.RandomResizedCrop(224),
        # transforms.RandomHorizontalFlip(),
        transforms.Resize(256),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

In [5]:

class OurDataset(Dataset):
    def __init__(self, data_transforms=None, valid=False):
        self.data_transforms = data_transforms
        if not valid:
          csv_path = "/content/drive/Shareddrives/GeoTracking_AI/image_data.csv"
          self.image_path = "/content/drive/Shareddrives/GeoTracking_AI/images"
        if valid:
          csv_path = "/content/drive/Shareddrives/GeoTracking_AI/image_valid_data.csv"
          self.image_path = "/content/drive/Shareddrives/GeoTracking_AI/valid_images"
        
        full_csv_frame = pd.read_csv(csv_path)
        csv_frame = full_csv_frame[["img_id", "city_id", "heading"]]
        
        self.pd_frame = csv_frame

        np_frame = csv_frame.to_numpy()

        

    def __len__(self):
        return self.pd_frame.iloc[:, 0].size


    def __getitem__(self, idx):
        image_path = self.image_path + "/" + str(self.pd_frame.iloc[idx, 0]).zfill(5) 
        img1 = Image.open(image_path + "_" + str( (self.pd_frame.iloc[idx, 2]) ).zfill(3) + ".jpg")
        img2 = Image.open(image_path + "_" + str( (self.pd_frame.iloc[idx, 2] + 120)%360 ).zfill(3) + ".jpg")
        img3 = Image.open(image_path + "_" + str( (self.pd_frame.iloc[idx, 2] + 240)%360 ).zfill(3) + ".jpg")
      
        img1 = torch.from_numpy(np.asarray(img1))     # convert to PyTorch Tensor
        img2 = torch.from_numpy(np.asarray(img2))  
        img3 = torch.from_numpy(np.asarray(img3)) 


        if self.data_transforms != None:
          img1 = self.data_transforms(img1)
          img2 = self.data_transforms(img2)
          img3 = self.data_transforms(img3)
        else:
          # Change from 640, 640, 3 to 3, 640, 640
          img1 = np.asarray(img1).transpose(-1, 0, 1)
          img2 = np.asarray(img2).transpose(-1, 0, 1)
          img3 = np.asarray(img3).transpose(-1, 0, 1)
        
        # img1 = torch.from_numpy(np.asarray(img1))     # convert to PyTorch Tensor (covered in transforms?)
        # img2 = torch.from_numpy(np.asarray(img2))     
        # img3 = torch.from_numpy(np.asarray(img3))     


        return (img1, img2, img3) , self.pd_frame.iloc[idx, 1]


Set Arguments

In [84]:
class Args(object):
    pass

args = Args();

args.learning_rate = .001
args.max_epochs = 3
args.batch_size = 64


Start by grabbing the already existing ResNet18 library

In [88]:
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1):
        super(BasicBlock, self).__init__()
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)

        return out

class Model(nn.Module):
  def __init__(self):
      super(Model, self).__init__()
      self.conv0 = nn.Conv2d(3, 8, 3)
      self.conv1 = BasicBlock(8, 8)
      self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
      self.conv2 = BasicBlock(8, 8)
      self.conv3 = BasicBlock(8, 8)
      self.conv4 = BasicBlock(8, 8)
      self.conv5 = BasicBlock(8, 8)
      self.fc1 = nn.Linear(2888, 120)
      self.fc2 = nn.Linear(120, 84)
      self.fc3 = nn.Linear(84, 10)            # Note that output has dimension 10
  
  def forward(self, x):
      x = self.pool(F.relu(self.conv0(x)))
      x = self.pool(F.relu(self.conv1(x)))
      x = self.pool(F.relu(self.conv2(x)))
      x = self.pool(F.relu(self.conv3(x)))
      x = self.pool(F.relu(self.conv4(x)))
      x = F.relu(self.conv5(x))
      x = x.view(-1, 2888)
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = self.fc3(x)
      return x 

model_ft = Model()

model_ft = model_ft.to(device)

In [69]:
def save_model(model, name):
  if (path.exists(name + ".pth")): raise Exception("already exists")
  else: save(model.state_dict(), name + ".pth")


In [70]:
train_dataset = OurDataset(data_transforms=None,valid=False)

trainloader = torch.utils.data.DataLoader(train_dataset, 
                                          batch_size = args.batch_size, 
                                          shuffle = True, 
                                          num_workers = 2)

valid_dataset = OurDataset(data_transforms=None,valid=True)
validloader = torch.utils.data.DataLoader(valid_dataset, 
                                          batch_size = args.batch_size, 
                                          shuffle = True, 
                                          num_workers = 2)

Train new model

In [82]:
def train_model(args, model):


    model = model.to(device) 

    #transform = transforms.Compose([transforms.ToTensor(),
    #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
   
    #train_dataset = OurDataset(data_transforms=transform)
    #valid_dataset = OurDataset(data_transforms=transform)

    
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    prev_val_acc = 0
    running_loss = 0.0
    best_val_acc = -1000
    for epoch in range(args.max_epochs):     # will get interupted by convergence test if validation acc drops
      for i, data in enumerate(trainloader, 0):
        # print("started i loop", i)
        for j in range(3):
          img = data[0][j]
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = img.to(device), data[1].to(device)
          inputs = inputs.float()         # Broke our RAM (?)

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()
          del inputs, labels, loss        # added to reduce RAM issues
          gc.collect()

        # if i % 1000 == 999:    # print every 1000 mini-batches
        if i % 50 == 49:
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0

        # print("finished i loop", i)
        # print()

    save_model(model, "/content/drive/Shareddrives/GeoTracking_AI/custom_attempt_skip2")

        
    print('Finished Training')

In [85]:
model_ft = train_model(args, model_ft)



[1,    50] loss: 0.131




[2,    50] loss: 0.121




[3,    50] loss: 0.111
Finished Training


In [86]:
def accuracy_labels(preds, labels):
    return np.sum(preds == labels)/len(preds)

In [89]:
val_model = model_ft
val_dict = load("/content/drive/Shareddrives/GeoTracking_AI/custom_attempt_skip2.pth")
val_model.load_state_dict(val_dict)

<All keys matched successfully>

In [90]:
  # Test on Val set
  test_batch_size = 64
  out_preds = []
  out_labels = []
  for (X,Y) in validloader:
    X = X[0].to(device)
    Y = Y.to(device)
    X = X.float()         # Broke our RAM (working at 64, not 256)
    y_pred = torch.argmax(val_model(X), dim = 1).tolist()
    print(y_pred)
    y_pred = map(int, y_pred)
    print(y_pred)
    out_preds.extend(list(y_pred))
    out_labels.extend(Y.tolist())
      
  this_val_acc = accuracy_labels(np.array(out_preds), np.array(out_labels))

  print('new_val_acc: %.3f'  %( this_val_acc))




[0, 4, 3, 7, 7, 0, 0, 0, 8, 0, 7, 8, 5, 0, 3, 8, 0, 3, 3, 3, 0, 0, 7, 7, 5, 7, 7, 3, 5, 5, 5, 0, 1, 5, 3, 5, 3, 7, 7, 5, 5, 5, 5, 7, 8, 3, 1, 3, 8, 3, 7, 0, 0, 0, 5, 7, 5, 5, 5, 5, 5, 0, 4, 1]
<map object at 0x7fc46b289050>
[7, 5, 3, 5, 0, 4, 3, 6, 5, 0, 5, 5, 0, 0, 1, 3, 0, 8, 1, 5, 8, 7, 7, 0, 6, 3, 8, 4, 5, 7, 0, 8, 5, 5, 0, 5, 5, 0, 0, 5, 3, 0, 3, 5, 5, 0, 3, 6, 1, 0, 7, 3, 1, 7, 0, 5, 7, 3, 8, 0, 5, 5, 3, 0]
<map object at 0x7fc4060786d0>
[0, 4, 7, 7, 5, 3, 1, 5, 0, 0, 5, 5, 0, 5, 5, 3, 8, 7, 3, 3, 0, 8, 5, 3, 0, 0, 1, 5, 0, 1, 4, 1, 0, 3, 8, 5, 5, 3, 1, 8, 5, 5, 6, 8, 5, 8, 7, 1, 6, 0, 4, 7, 0, 0, 0, 0, 8, 3, 7, 7, 0, 0, 0, 5]
<map object at 0x7fc46b289050>
[5, 3, 3, 3, 7, 5, 7, 7, 3, 3, 4, 6, 7, 1, 3, 3, 0, 8, 0, 0, 3, 3, 3, 0, 7, 6, 0, 3, 6, 7, 5, 1, 0, 5, 7, 6, 7, 7, 5, 7, 4, 7, 8, 8, 3, 4, 5, 8, 6, 3, 5, 3, 8, 3, 0, 5, 5, 3, 0, 1, 5, 0, 6, 8]
<map object at 0x7fc4060786d0>
[1, 3, 8, 0, 0, 8, 6, 1, 1, 0, 1, 1, 5, 7, 0, 3, 0, 0, 7, 1, 7, 7, 8, 7, 6, 3, 7, 5, 0, 5, 4, 5, 5, 0, 5

Report:

* Made our own dataset - DONE

* Transfer Model - 4/15
  * Hyper-parameter testing
* Model from Scratch (do auto-tuning or hand-tune)
  * Hyper-parameter testing
* Have one of two models work well (compared to human benchmark)

Write Report - 4/23-4/30

* Visualization that we can see images, their correct labels, model's guess for label
* Percentage correct for each city bar graph