# Imports

In [None]:
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from albumentations import Compose, ShiftScaleRotate, Blur
from albumentations.pytorch import ToTensor

import matplotlib.pyplot as plt
import numpy as np

from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix, classification_report
def make_report(y_pred , y_true, fig_size = (20,20)):
    print ("")
    print ("Classification Report: ")
    print (classification_report(y_true, y_pred))
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plot_confusion_matrix(figsize=fig_size, conf_mat=cm)
    plt.show()

# Loading data

In [None]:
data = pd.read_pickle('/kaggle/input/traffic-signs-preprocessed/data0.pickle')
print('keys:', data.keys())
print('train shape:', data['x_train'].shape)
print('test shape:', data['x_test'].shape)
print('validation shape:', data['x_validation'].shape)

In [None]:
for i in range(2):
    img = data['x_train'][i].T
    plt.imshow(img)
    plt.axis('off')
    plt.show()

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, x, y, num_classes):
        self.x = x
        self.y = y
        self.n_class = num_classes

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x=torchvision.transforms.functional.to_tensor(self.x[idx].astype(np.uint8).reshape((32, 32, 3)))
        
        label_idx = self.y[idx]
        label = np.zeros(self.n_class)
        label[label_idx] = 1
        label = torch.tensor(label)
#         return {'x': self.x[idx], 'y': label}
        return {'x': x, 'y': label}
    
BATCH_SIZE = 256    
train_dataset = MyDataset(data['x_train'], data['y_train'], 43)
dataLoader_train = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)

test_dataset = MyDataset(data['x_test'], data['y_test'], 43)
dataLoader_test = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True)

validation_dataset = MyDataset(data['x_validation'], data['y_validation'], 43)
dataLoader_validation = torch.utils.data.DataLoader(validation_dataset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=True)

# EDA

In [None]:
plt.hist(data['y_train'], bins = 43)

so the classes are balanced

# Modeling

In [None]:
class Model(torch.nn.Module):
    
    def __init__(self, num_classes):
        super().__init__()
        
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
        )
        
        self.flatten = torch.nn.Sequential(torch.nn.AdaptiveMaxPool2d(1), torch.nn.Flatten())
        
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(64, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x) 
        x = self.fc(x)
        return x
    
model = Model(43)
print(model)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

In [None]:
%%time

n_epochs = 3
print('started!')
for epoch in range(n_epochs):
    train_batch_loss = 0
    model.train()
    for step, batch in enumerate(dataLoader_train):
        x = batch["x"]
        y = batch["y"]

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, torch.max(y, 1)[1])
        loss.backward()
        optimizer.step()
        train_batch_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()

    test_batch_loss = 0
    model.eval()
    with torch.no_grad():
        for step, batch in enumerate(dataLoader_test):
            x = batch["x"]
            y = batch["y"]
            outputs = model(x)
            loss = criterion(outputs, torch.max(y, 1)[1])
            test_batch_loss += loss.item()

    print('epoch {}/{} finished with train loss: {} and test loss: {}'.format(epoch+1, n_epochs,
                                                                              train_batch_loss / len(dataLoader_train),
                                                                              test_batch_loss / len(dataLoader_test)))
    
torch.save(model.state_dict(), './model_RGB')

In [None]:
def res(dataLoader):  
    trues = []
    preds = []
    model.eval()
    with torch.no_grad():
        for step, batch in enumerate(dataLoader):
            x = batch["x"]
            y = batch["y"]

            outputs = model(x)

            true_labels = torch.max(y, 1)[1]
            trues = trues + true_labels.tolist()
            pred_labels = torch.max(outputs, 1)[1]
            preds = preds + pred_labels.tolist()


    make_report(y_pred = preds, y_true = trues)

print('Test')
res(dataLoader_test)
print('validation')
res(dataLoader_validation)

# Gray scale 

In [None]:
data = pd.read_pickle('/kaggle/input/traffic-signs-preprocessed/data4.pickle')
print('keys:', data.keys())
print('train shape:', data['x_train'].shape)
print('test shape:', data['x_test'].shape)
print('validation shape:', data['x_validation'].shape)

In [None]:
for i in range(2):
    img = data['x_train'][i].T
    plt.imshow(img, cmap='gray', vmin=0, vmax=255)
    plt.axis('off')
    plt.show()

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, x, y, num_classes):
        self.x = x
        self.y = y
        self.n_class = num_classes

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x=torchvision.transforms.functional.to_tensor(self.x[idx].astype(np.uint8).reshape((32, 32, 1)))
        
        label_idx = self.y[idx]
        label = np.zeros(self.n_class)
        label[label_idx] = 1
        label = torch.tensor(label)
#         return {'x': self.x[idx], 'y': label}
        return {'x': x, 'y': label}
    
BATCH_SIZE = 256    
train_dataset = MyDataset(data['x_train'], data['y_train'], 43)
dataLoader_train = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)

test_dataset = MyDataset(data['x_test'], data['y_test'], 43)
dataLoader_test = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True)

validation_dataset = MyDataset(data['x_validation'], data['y_validation'], 43)
dataLoader_validation = torch.utils.data.DataLoader(validation_dataset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=True)

In [None]:
# #68 percent
# class Model(torch.nn.Module):
    
#     def __init__(self, num_classes):
#         super().__init__()
        
#         self.conv = torch.nn.Sequential(
#             torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3)),
#             torch.nn.ReLU(),
#             torch.nn.BatchNorm2d(32),
#             torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3)),
#             torch.nn.ReLU(),
#             torch.nn.BatchNorm2d(32),
#         )
        
#         self.flatten = torch.nn.Sequential(torch.nn.AdaptiveMaxPool2d(1), torch.nn.Flatten())
        
#         self.fc = torch.nn.Sequential(
#             torch.nn.Linear(32, 256),
#             torch.nn.ReLU(),
#             torch.nn.Dropout(0.3),
#             torch.nn.Linear(256, num_classes)
#         )
        
#     def forward(self, x):
#         x = self.conv(x)
#         x = self.flatten(x) 
#         x = self.fc(x)
#         return x
    
# model = Model(43)
# print(model)
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

In [None]:
# #81
# class Model(torch.nn.Module):
    
#     def __init__(self, num_classes):
#         super().__init__()
        
#         self.conv = torch.nn.Sequential(
#             torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3)),
#             torch.nn.ReLU(),
#             torch.nn.BatchNorm2d(32),
#             torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)),
#             torch.nn.ReLU(),
#             torch.nn.BatchNorm2d(64),
#         )
        
#         self.flatten = torch.nn.Sequential(torch.nn.AdaptiveMaxPool2d(1), torch.nn.Flatten())
        
#         self.fc = torch.nn.Sequential(
#             torch.nn.Linear(64, 512),
#             torch.nn.ReLU(),
#             torch.nn.Linear(512, num_classes)
#         )
        
#     def forward(self, x):
#         x = self.conv(x)
#         x = self.flatten(x) 
#         x = self.fc(x)
#         return x
    
# model = Model(43)
# print(model)
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

In [None]:
#90
class Model(torch.nn.Module):
    
    def __init__(self, num_classes):
        super().__init__()
        
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
        )
        
        self.flatten = torch.nn.Sequential(torch.nn.AdaptiveMaxPool2d(1), torch.nn.Flatten())
        
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(64, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x) 
        x = self.fc(x)
        return x
    
model = Model(43)
print(model)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

In [None]:
%%time

n_epochs = 3
print('started!')
for epoch in range(n_epochs):
    train_batch_loss = 0
    model.train()
    for step, batch in enumerate(dataLoader_train):
        x = batch["x"]
        y = batch["y"]

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, torch.max(y, 1)[1])
        loss.backward()
        optimizer.step()
        train_batch_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()

    test_batch_loss = 0
    model.eval()
    with torch.no_grad():
        for step, batch in enumerate(dataLoader_test):
            x = batch["x"]
            y = batch["y"]
            outputs = model(x)
            loss = criterion(outputs, torch.max(y, 1)[1])
            test_batch_loss += loss.item()

    print('epoch {}/{} finished with train loss: {} and test loss: {}'.format(epoch+1, n_epochs,
                                                                              train_batch_loss / len(dataLoader_train),
                                                                              test_batch_loss / len(dataLoader_test)))
    
torch.save(model.state_dict(), './model_gray')

In [None]:
def res(dataLoader):  
    trues = []
    preds = []
    model.eval()
    with torch.no_grad():
        for step, batch in enumerate(dataLoader):
            x = batch["x"]
            y = batch["y"]

            outputs = model(x)

            true_labels = torch.max(y, 1)[1]
            trues = trues + true_labels.tolist()
            pred_labels = torch.max(outputs, 1)[1]
            preds = preds + pred_labels.tolist()


    make_report(y_pred = preds, y_true = trues)

print('Test')
res(dataLoader_test)
print('validation')
res(dataLoader_validation)

# Visualization of filters and outputs

In [None]:
model = Model(43)
model.load_state_dict(torch.load('./model_gray'))

In [None]:
model.conv[0]
def imshow_filter(img,row,col):
    print('-------------------------------------------------------------')
    plt.figure()
    for i in range(len(filters)):
        img = filters[i]
        img = np.transpose(img, (1, 2, 0))
        img = img/(img.max()-img.min())
        plt.subplot(row,col,i+1)
        plt.imshow(img,cmap= 'gray')
        plt.xticks([])
        plt.yticks([])
    plt.show()

print('layer1')
filters = model.conv[0].weight.data.cpu().numpy()
imshow_filter(filters,8, 4)

print('layer2')
filters = model.conv[3].weight.data.cpu().numpy()[:,0:1,:,:]
imshow_filter(filters,8, 4)

print('layer3')
filters = model.conv[6].weight.data.cpu().numpy()[:,0:1,:,:]
imshow_filter(filters,8, 8)

## outputs 

In [None]:
#90
class VizModel(torch.nn.Module):
    
    def __init__(self, num_classes):
        super().__init__()
        
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
        )
        
        self.flatten = torch.nn.Sequential(torch.nn.AdaptiveMaxPool2d(1), torch.nn.Flatten())
        
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(64, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        for layer in self.conv:
            x = layer(x)
            for i in range(16):
                img = x.detach().numpy()[0,i:i+1,:,:]
                img = np.transpose(img, (1, 2, 0))
                img = img/(img.max()-img.min())+0.001
                plt.subplot(8,4,i+1)
                plt.imshow(img,cmap= 'gray')
                plt.xticks([])
                plt.yticks([])
            plt.savefig('./{}.jpg'.format(layer))
            plt.show()

        x = self.flatten(x) 
        x = self.fc(x)
        return x

model = VizModel(43)
model.load_state_dict(torch.load('./model_gray'))

In [None]:
a = next(iter(dataLoader_train))
x = a['x'][4:5,:,:,:]
model(x)

img = x[0,:,:,:].T
plt.imshow(img, cmap='gray', vmin=0, vmax=1)
plt.axis('off')
plt.savefig('./sign.jpg')
plt.show()

# data Augmentation

In [None]:
data = pd.read_pickle('/kaggle/input/traffic-signs-preprocessed/data4.pickle')
print('keys:', data.keys())
print('train shape:', data['x_train'].shape)
print('test shape:', data['x_test'].shape)
print('validation shape:', data['x_validation'].shape)

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, x, y, num_classes, transform = None):
        self.x = x
        self.y = y
        self.n_class = num_classes
        self.transform = transform

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
#         x=torchvision.transforms.functional.to_tensor(self.x.astype(np.uint8).reshape((32, 32, 1)))
        
        augmented = self.transform(image=self.x[idx].reshape((32, 32, 1)))
        x = augmented['image']
#         print(x.shape)
        label_idx = self.y[idx]
        label = np.zeros(self.n_class)
        label[label_idx] = 1
        label = torch.tensor(label)
#         return {'x': self.x[idx], 'y': label}
        return {'x': x, 'y': label}

transform_train = Compose([
    Blur(blur_limit=3, p=0.1),
    ShiftScaleRotate(rotate_limit=45, p=0.3),
    ToTensor()
])

transform_test = Compose([
    ToTensor()
])
    
BATCH_SIZE = 256    
train_dataset = MyDataset(data['x_train'], data['y_train'], 43, transform = transform_train)
dataLoader_train = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)

test_dataset = MyDataset(data['x_test'], data['y_test'], 43, transform = transform_test)
dataLoader_test = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True)

validation_dataset = MyDataset(data['x_validation'], data['y_validation'], 43,transform = transform_test)
dataLoader_validation = torch.utils.data.DataLoader(validation_dataset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=True)

In [None]:
#90
class Model(torch.nn.Module):
    
    def __init__(self, num_classes):
        super().__init__()
        
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
        )
        
        self.flatten = torch.nn.Sequential(torch.nn.AdaptiveMaxPool2d(1), torch.nn.Flatten())
        
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(64, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x) 
        x = self.fc(x)
        return x
    
model = Model(43)
print(model)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

In [None]:
%%time

n_epochs = 3
print('started!')
for epoch in range(n_epochs):
    train_batch_loss = 0
    model.train()
    for step, batch in enumerate(dataLoader_train):
        x = batch["x"]
        y = batch["y"]

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, torch.max(y, 1)[1])
        loss.backward()
        optimizer.step()
        train_batch_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()

    test_batch_loss = 0
    model.eval()
    with torch.no_grad():
        for step, batch in enumerate(dataLoader_test):
            x = batch["x"]
            y = batch["y"]
            outputs = model(x)
            loss = criterion(outputs, torch.max(y, 1)[1])
            test_batch_loss += loss.item()

    print('epoch {}/{} finished with train loss: {} and test loss: {}'.format(epoch+1, n_epochs,
                                                                              train_batch_loss / len(dataLoader_train),
                                                                              test_batch_loss / len(dataLoader_test)))
    
torch.save(model.state_dict(), './model_aug')

In [None]:
def res(dataLoader):  
    trues = []
    preds = []
    model.eval()
    with torch.no_grad():
        for step, batch in enumerate(dataLoader):
            x = batch["x"]
            y = batch["y"]

            outputs = model(x)

            true_labels = torch.max(y, 1)[1]
            trues = trues + true_labels.tolist()
            pred_labels = torch.max(outputs, 1)[1]
            preds = preds + pred_labels.tolist()


    make_report(y_pred = preds, y_true = trues)

print('Test')
res(dataLoader_test)
print('validation')
res(dataLoader_validation)