<a href="https://colab.research.google.com/github/katL7/GTSRB-APS360/blob/delete_line_where_I_shuffle_dict/APS360_Traffic_Sign_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [4]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
from google.colab import drive
import os
import matplotlib.pyplot as plt

## Dataset

In [4]:
data_transform = transforms.Compose([transforms.Resize(50), 
                                      transforms.ToTensor()])

gtsrb_data= datasets.GTSRB('data', download=True, 
                           transform=data_transform)

Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB-Training_fixed.zip to data/gtsrb/GTSRB-Training_fixed.zip


  0%|          | 0/187490228 [00:00<?, ?it/s]

Extracting data/gtsrb/GTSRB-Training_fixed.zip to data/gtsrb


In [2]:
# split into training, validation and test - stratify each
def get_data_loader(batch_size=64):
    # Rescale images to all be the same size
    data_transform = transforms.Compose([transforms.Resize(50), 
                                         transforms.ToTensor()])

    # Get paths to data in folder
    data = datasets.GTSRB('data', download=True, 
                           transform=data_transform)

    class_idxs = {}
    #Loop through filenames and sort into classes
    for i, data in enumerate(data):
        img, label = data
        if label in class_idxs:
          class_idxs[label].append(i)
        else:
          class_idxs[label] = [i]

    np.random.seed(1000)
    train_indices = []
    val_indices = []
    test_indices = []

    for class_key in class_idxs:
      #Split training/validation/test indices as 0.7/0.20/0.10 split by class
      np.random.shuffle(class_idxs[class_key])
      split1,split2 = int(len(class_idxs[class_key]) * 0.7), int(len(class_idxs[class_key]) * 0.9)
      train_indices += class_idxs[class_key][:split1]
      val_indices += class_idxs[class_key][split1:split2]
      test_indices += class_idxs[class_key][split2:]

    #Shuffle the training/validation/test indices
    np.random.shuffle(train_indices)
    np.random.shuffle(val_indices)
    np.random.shuffle(test_indices)

    #Training data loader
    train_sampler = SubsetRandomSampler(train_indices)
    train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size,
          num_workers=0, sampler=train_sampler)

    #Validation data loader
    val_sampler = SubsetRandomSampler(val_indices)
    val_loader = torch.utils.data.DataLoader(data, batch_size=batch_size,
          num_workers=0, sampler=val_sampler)

    #Test data loader
    test_sampler = SubsetRandomSampler(test_indices)
    test_loader = torch.utils.data.DataLoader(data, batch_size=batch_size,
          num_workers=0, sampler=test_sampler)
    
    return train_loader, val_loader, test_loader

In [5]:
train_loader, val_loader, test_loader = get_data_loader(64)

Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB-Training_fixed.zip to data/gtsrb/GTSRB-Training_fixed.zip


  0%|          | 0/187490228 [00:00<?, ?it/s]

Extracting data/gtsrb/GTSRB-Training_fixed.zip to data/gtsrb




torch.utils.data.dataloader.DataLoader

## Jordan just playing around to try to get a dataloader object
I'm using Lab 2 as a guide

Figuring out the structure of the dataset

In [None]:
print(type(gtsrb_data))
print(gtsrb_data)
print(len(gtsrb_data))
print(gtsrb_data[10000][1])

<class 'torchvision.datasets.gtsrb.GTSRB'>
Dataset GTSRB
    Number of datapoints: 26640
    Root location: data
26640
10


Using some helper functions that I've slightly modified from Lab 2

In [None]:
def get_relevant_indices(dataset, classes, target_classes):
    """ Return the indices for datapoints in the dataset that belongs to the
    desired target classes, a subset of all possible classes.

    Args:
        dataset: Dataset object
        classes: A list of strings denoting the name of each class
        target_classes: A list of strings denoting the name of desired classes
                        Should be a subset of the 'classes'
    Returns:
        indices: list of indices that have labels corresponding to one of the
                 target classes
    """
    indices = []
    for i in range(len(dataset)):
        # Check if the label is in the target classes
        label_index = dataset[i][1] # ex: 3
        # label_class = classes[label_index] # ex: 'cat'
        if label_index in target_classes:
            indices.append(i)
    return indices

def get_data_loader(target_classes, batch_size):
    """ Loads images of signs, splits the data into training, validation
    and testing datasets. Returns data loaders for the three preprocessed datasets.

    Args:
        target_classes: A list of ints denoting the index of the desired
                        classes. Should be a subset of the argument 'classes'
        batch_size: A int representing the number of samples per batch
    
    Returns:
        train_loader: iterable training dataset organized according to batch size
        val_loader: iterable validation dataset organized according to batch size
        test_loader: iterable testing dataset organized according to batch size
        classes: A list of ints denoting the index of each class
    """

    classes = tuple(range(0,43))
    ########################################################################
    # The output of torchvision datasets are PILImage images of range [0, 1].
    # We transform them to Tensors of normalized range [-1, 1].
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    # Load GTSRB training data
    trainset = torchvision.datasets.GTSRB(root='./data', split='train', transform=transform, download=True)
    # Get the list of indices to sample from
    relevant_indices = get_relevant_indices(trainset, classes, target_classes)
    
    # Split into train and validation
    np.random.seed(1000) # Fixed numpy random seed for reproducible shuffling
    np.random.shuffle(relevant_indices)
    split = int(len(relevant_indices) * 0.8) #split at 80%
    
    # split into training and validation indices
    relevant_train_indices, relevant_val_indices = relevant_indices[:split], relevant_indices[split:]  
    train_sampler = SubsetRandomSampler(relevant_train_indices)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                               num_workers=1, sampler=train_sampler)
    val_sampler = SubsetRandomSampler(relevant_val_indices)
    val_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              num_workers=1, sampler=val_sampler)
    # Load GTSRB testing data
    testset = torchvision.datasets.GTSRB(root='./data', split='test', transform=transform, download=True)
    # Get the list of indices to sample from
    relevant_test_indices = get_relevant_indices(testset, classes, target_classes)
    test_sampler = SubsetRandomSampler(relevant_test_indices)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             num_workers=1, sampler=test_sampler)
    return train_loader, val_loader, test_loader, classes

In [None]:
classes = tuple(range(0,43))
target_classes = (0,)
print(type(classes[0]))
print(classes)
indices_class_0 = get_relevant_indices(gtsrb_data, classes, target_classes)
print(indices_class_0) # Now I have the indices in gtsrb_data of ClassId=0

# Get data loaders for training, validation, and test sets (images are only of ClassId=0)
train_loader, val_loader, test_loader, classes = get_data_loader(target_classes, batch_size=5)

<class 'int'>
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149]
Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_Images.zip to data/gtsrb/GTSRB_Final_Test_Images.zip


  0%|          | 0/88978620 [00:00<?, ?it/s]

Extracting data/gtsrb/GTSRB_Final_Test_Images.zip to data/gtsrb
Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_GT.zip to data/gtsrb/GTSRB_Final_Test_GT.zip


  0%|          | 0/99620 [00:00<?, ?it/s]

Extracting data/gtsrb/GTSRB_Final_Test_GT.zip to data/gtsrb


In [None]:
# Playing around with data loaders I created above
print(len(train_loader)) # There are 24 batches (5 images/batch) in the train loader -> This is 120 images which is 80% of 150!
print(len(val_loader)) # There are 6 batches (5 images/batch) in the val loader -> This is 30 images which is 20% of 150
print(len(test_loader)) # There are 12 batches (5 images/batch) in the test loader -> This is 60 images

24
6
12


Next I'll try to plot some of the images, just to see what we're working with

I don't think this is working because the images are of different sizes, we'll have to figure out what we're doing about this.

In [None]:
k = 0
for images, labels in enumerate(train_loader):
    # since batch_size = 5, there are 5 images in 'images'
    for i in range(0,5):
      image = images[i]
      # place the colour channel at the end, instead of at the beginning
      img = np.transpose(image, [1,2,0])
      # normalize pixel intensity values to [0, 1]
      img = img / 2 + 0.5
      plt.subplot(3, 5, k+1)
      plt.axis('off')
      plt.imshow(img)

    k += 1
    if k > 14:
        break

RuntimeError: ignored

**Helper Functions**

Will need functions to


*   Get model name (for training)
*   Evaluate network on validation set
*   Plot the training curves




**Define Model Architecture**

In [None]:
class TrafficSignModel(nn.Module):
  def __init__(self):
     super(TrafficSignModel, self).__init__()
     self.name = "TrafficSignModel"
     self.conv1 = nn.Conv2d(3, 5, 5) # 3 input channels, 5 output channels, kernel size of 5
     self.pool = nn.MaxPool2d(2, 2), # kernel size of 2, stride of 2
     self.conv2 = nn.Conv2d(5, 10, 5), # 5 input channels, 10 output channels, kernel size of 5
     self.fc1 = nn.Linear() # Will need to figure out these dimensions (based on input dimensions of image - how are we handling this?)
     self.fc2 = nn.Linear() # Will need to figure out these dimensions (based on how many different classes we are working with - do we know?)
         
  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(-1, ) # Will need to figure out these dimensions - see above
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    x = x.squeeze(1) # Flatten to [batch_size]
    return x

**Training**

Function to train the neural network
Need to decide loss function and optimizer

A lot of this code can be adopted from the Labs and tutorials

## Katherine trying to augment the data

In [None]:
augmented_datasets = []

my_transform = transforms.Compose([
    transforms.RandomRotation(25),
    transforms.ToTensor(),
])

for _ in range(2):
    gtsrb_new = datasets.GTSRB('data', download=True, transform=my_transform)
    augmented_datasets.append(gtsrb_new)

concat = torch.utils.data.ConcatDataset(augmented_datasets)

In [None]:
# mount our Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
SyRa_folder = "drive/MyDrive/APS360/Project/100000_nets_ema.ckpt"

SyRa = torch.load(SyRa_folder)

In [None]:
os.chdir('drive/MyDrive/APS360/Project/SyRa-Synthesized_Rain_dataset-main')

In [None]:
%%shell
python main.py --img_size 256 --mode syn --checkpoint_dir expr/checkpoint/SyRa --out_dir expr/result --data folder_of_your_data --resume_iter 100000

Traceback (most recent call last):
  File "main.py", line 5, in <module>
    from munch import Munch
ModuleNotFoundError: No module named 'munch'


CalledProcessError: ignored