## **Prepare Dataset**

### Download Dataset

In [1]:
! pip install -q kaggle

In [2]:
from google.colab import files

files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"s00hyun00","key":"521460db8cc76b0d510d56c9bd51eb71"}'}

In [3]:
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

In [4]:
! chmod 600 ~/.kaggle/kaggle.json

In [5]:
! kaggle datasets list -s german

ref                                                     title                                               size  lastUpdated          downloadCount  voteCount  usabilityRating  
------------------------------------------------------  -------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
uciml/german-credit                                     German Credit Risk                                  11KB  2016-12-14 21:25:02          41745        373  0.8235294        
meowmeowmeowmeowmeow/gtsrb-german-traffic-sign          GTSRB - German Traffic Sign Recognition Benchmark  612MB  2018-11-25 18:12:34          83437        969  0.8235294        
sterby/german-recipes-dataset                           German Recipes Dataset                               5MB  2019-03-06 16:25:22           4012         68  1.0              
mpwolke/cusersmarildownloadsgermancsv                   Creditability - German Credit Data               

In [6]:
! kaggle datasets download -d meowmeowmeowmeowmeow/gtsrb-german-traffic-sign

Downloading gtsrb-german-traffic-sign.zip to /content
 99% 603M/612M [00:06<00:00, 106MB/s]
100% 612M/612M [00:06<00:00, 101MB/s]


In [7]:
!unzip gtsrb-german-traffic-sign.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: train/5/00005_00053_00010.png  
  inflating: train/5/00005_00053_00011.png  
  inflating: train/5/00005_00053_00012.png  
  inflating: train/5/00005_00053_00013.png  
  inflating: train/5/00005_00053_00014.png  
  inflating: train/5/00005_00053_00015.png  
  inflating: train/5/00005_00053_00016.png  
  inflating: train/5/00005_00053_00017.png  
  inflating: train/5/00005_00053_00018.png  
  inflating: train/5/00005_00053_00019.png  
  inflating: train/5/00005_00053_00020.png  
  inflating: train/5/00005_00053_00021.png  
  inflating: train/5/00005_00053_00022.png  
  inflating: train/5/00005_00053_00023.png  
  inflating: train/5/00005_00053_00024.png  
  inflating: train/5/00005_00053_00025.png  
  inflating: train/5/00005_00053_00026.png  
  inflating: train/5/00005_00053_00027.png  
  inflating: train/5/00005_00053_00028.png  
  inflating: train/5/00005_00053_00029.png  
  inflating: train/5/00005_00054_00

In [8]:
import pandas as pd

train = pd.read_csv('./Train.csv')
test = pd.read_csv('./Test.csv')

In [9]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39209 entries, 0 to 39208
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Width    39209 non-null  int64 
 1   Height   39209 non-null  int64 
 2   Roi.X1   39209 non-null  int64 
 3   Roi.Y1   39209 non-null  int64 
 4   Roi.X2   39209 non-null  int64 
 5   Roi.Y2   39209 non-null  int64 
 6   ClassId  39209 non-null  int64 
 7   Path     39209 non-null  object
dtypes: int64(7), object(1)
memory usage: 2.4+ MB


## Pre-process Dataset

In [10]:
num_classes = len(train['ClassId'].unique())
print("Number of classes: ", num_classes)

Number of classes:  43


In [11]:
data_dir = './'
train_path = './Train'
test_path = './Test'

# Resizing the images to 30x30x3
IMG_HEIGHT = 30
IMG_WIDTH = 30
channels = 3

# Label Overview
classes = { 0:'Speed limit (20km/h)',
            1:'Speed limit (30km/h)', 
            2:'Speed limit (50km/h)', 
            3:'Speed limit (60km/h)', 
            4:'Speed limit (70km/h)', 
            5:'Speed limit (80km/h)', 
            6:'End of speed limit (80km/h)', 
            7:'Speed limit (100km/h)', 
            8:'Speed limit (120km/h)', 
            9:'No passing', 
            10:'No passing veh over 3.5 tons', 
            11:'Right-of-way at intersection', 
            12:'Priority road', 
            13:'Yield', 
            14:'Stop', 
            15:'No vehicles', 
            16:'Veh > 3.5 tons prohibited', 
            17:'No entry', 
            18:'General caution', 
            19:'Dangerous curve left', 
            20:'Dangerous curve right', 
            21:'Double curve', 
            22:'Bumpy road', 
            23:'Slippery road', 
            24:'Road narrows on the right', 
            25:'Road work', 
            26:'Traffic signals', 
            27:'Pedestrians', 
            28:'Children crossing', 
            29:'Bicycles crossing', 
            30:'Beware of ice/snow',
            31:'Wild animals crossing', 
            32:'End speed + passing limits', 
            33:'Turn right ahead', 
            34:'Turn left ahead', 
            35:'Ahead only', 
            36:'Go straight or right', 
            37:'Go straight or left', 
            38:'Keep right', 
            39:'Keep left', 
            40:'Roundabout mandatory', 
            41:'End of no passing', 
            42:'End no passing veh > 3.5 tons' }

##**Train set**

In [12]:
# Collecting the Training Data
import os
import cv2
import numpy as np
from PIL import Image
import torch

image_data = []
image_labels = []

for i in range(num_classes):
    path = data_dir + '/Train/' + str(i)
    images = os.listdir(path)

    for img in images:
        try:
            image = cv2.imread(path + '/' + img)
            image_fromarray = Image.fromarray(image, 'RGB')
            resize_image = image_fromarray.resize((IMG_HEIGHT, IMG_WIDTH))
            image_data.append(np.array(resize_image))
            image_labels.append(i)
        except:
            print("Error in " + img)

# Changing the list to numpy array
image_data = torch.FloatTensor(np.array(image_data))
image_data = np.transpose(image_data,(0,3,1,2))
image_labels = torch.FloatTensor(np.array(image_labels))

print(image_data.shape, image_labels.shape)

torch.Size([39209, 3, 30, 30]) torch.Size([39209])


In [13]:
# Split dataset
from sklearn.model_selection import train_test_split
from tensorflow import keras
from torch.utils.data import TensorDataset 


X_train, X_val, y_train, y_val = train_test_split(image_data, image_labels, test_size=0.2, random_state=42, shuffle=True)

X_train = X_train/255 
X_val = X_val/255

print("X_train.shape", X_train.shape)
print("X_valid.shape", X_val.shape)
print("y_train.shape", y_train.shape)
print("y_valid.shape", y_val.shape)

y_train = torch.FloatTensor(keras.utils.to_categorical(y_train, num_classes))
y_val = torch.FloatTensor(keras.utils.to_categorical(y_val, num_classes))

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

dataloaders = {'train': torch.utils.data.DataLoader(train_dataset, batch_size=64,shuffle=True, num_workers=2),
              'val': torch.utils.data.DataLoader(val_dataset, batch_size=64,shuffle=True, num_workers=2)}

X_train.shape torch.Size([31367, 3, 30, 30])
X_valid.shape torch.Size([7842, 3, 30, 30])
y_train.shape torch.Size([31367])
y_valid.shape torch.Size([7842])


## **Test set**

In [14]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12630 entries, 0 to 12629
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Width    12630 non-null  int64 
 1   Height   12630 non-null  int64 
 2   Roi.X1   12630 non-null  int64 
 3   Roi.Y1   12630 non-null  int64 
 4   Roi.X2   12630 non-null  int64 
 5   Roi.Y2   12630 non-null  int64 
 6   ClassId  12630 non-null  int64 
 7   Path     12630 non-null  object
dtypes: int64(7), object(1)
memory usage: 789.5+ KB


In [15]:
import shutil

# Read the dataframe from CSV or other sources
df = pd.read_csv('Test.csv')

# Path to the source directory containing PNG files
source_directory = '/content/'

# Iterate through the dataframe rows
for index, row in df.iterrows():
    class_id = row['ClassId']
    file_path = row['Path']

    # Create the destination folder for the class ID if it doesn't exist
    destination_folder = os.path.join('/content/Test', str(class_id))
    os.makedirs(destination_folder, exist_ok=True)

    # Move the PNG file to the destination folder
    source_file = os.path.join(source_directory, file_path)
    destination_file = os.path.join(destination_folder, os.path.basename(file_path))
    shutil.move(source_file, destination_file)

In [16]:
image_data = []
image_labels = []


num_classes2 = len(test['ClassId'].unique())
print(num_classes2)


data_dir = '/content/'

for i in range(num_classes2):
    path = data_dir + '/Test/' + str(i)
    images = os.listdir(path)

    for img in images:
        try:
            image = cv2.imread(path + '/' + img)
            image_fromarray = Image.fromarray(image, 'RGB')
            resize_image = image_fromarray.resize((IMG_HEIGHT, IMG_WIDTH))
            image_data.append(np.array(resize_image))
            image_labels.append(i)
        except:
            print("Error in " + img)

# Changing the list to numpy array
image_data = torch.FloatTensor(np.array(image_data))
image_data = np.transpose(image_data,(0,3,1,2))
image_labels = torch.FloatTensor(np.array(image_labels))

print(image_data.shape, image_labels.shape)

X = image_data
y = image_labels
                                                  
X = image_data/255 
y = torch.FloatTensor(keras.utils.to_categorical(y, num_classes))

dataset = TensorDataset(X, y)

test_dataloaders = torch.utils.data.DataLoader(dataset, batch_size=64,shuffle=True, num_workers=2)

43
torch.Size([12630, 3, 30, 30]) torch.Size([12630])


In [17]:
import torch


def evaluate(model, dataloader):
    model.eval()
    running_loss = 0.0
    total_samples = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, torch.max(labels, 1)[1])

            running_loss += loss.item() * inputs.size(0)
            total_samples += inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += torch.sum(predicted == torch.max(labels.data,1).indices)

    # Calculate accuracy and loss
    accuracy = 100 * correct / total
    avg_loss = running_loss / total_samples

    return avg_loss, accuracy

# ResNet-50

In [36]:
import torchvision
from torchvision import models
import torch.nn as nn
import torch.optim as optim

learning_rate = 0.0008861

# Setting device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load pre-trained Resnet-50 
base_resnet50 = models.resnet50(pretrained=True)
input = base_resnet50.fc.in_features

# Change FC layer outputs to number of classes
base_resnet50.fc = nn.Linear(input, num_classes)

# Send model to cuda
base_resnet50.to(device)


optimizer = optim.Adam(base_resnet50.parameters(), lr=learning_rate)

num_epochs = 30


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 152MB/s]


In [37]:
import torchvision
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Training loop
for epoch in range(num_epochs):
    base_resnet50.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch_idx, samples in enumerate(dataloaders['train']): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = base_resnet50(inputs)
        loss = F.cross_entropy(outputs, torch.max(labels, 1)[1])

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += torch.sum(predicted == torch.max(labels.data,1).indices)

    val_loss = evaluate(base_resnet50, dataloaders['val'])

In [38]:
from sklearn.metrics import f1_score

base_resnet50.eval()

# Initialize variables
correct = 0
total = 0
running_loss = 0.0

# Loss function
criterion = nn.CrossEntropyLoss()

# Initialize lists to store true and predicted labels
true_labels = []
pred_labels = []

with torch.no_grad():
    for inputs, samples in enumerate(test_dataloaders): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = base_resnet50(inputs)
        _, labels = torch.max(labels, 1)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)

        # Get predictions
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Store true and predicted labels
        true_labels.append(labels.cpu().numpy())
        pred_labels.append(predicted.cpu().numpy())

# Convert lists to numpy arrays
true_labels = np.concatenate(true_labels, axis=0)
pred_labels = np.concatenate(pred_labels, axis=0)

# Calculate accuracy
accuracy = 100 * correct / total

# Calculate average loss
average_loss = running_loss / total

# Calculate F1 score
f1 = f1_score(true_labels, pred_labels, average='macro')

# Print accuracy, loss, and F1 score
print(f"Accuracy: {accuracy:.2f}%")
print(f"Loss: {average_loss:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 95.60%
Loss: 0.2084
F1 Score: 0.9317


# LeNet-5

In [30]:
learning_rate = 0.0008741

# Setting device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load pre-trained Lenet5
base_lenet5 = models.googlenet(pretrained=True)
input = base_lenet5.fc.in_features

# Change FC layer outputs to number of classes
base_lenet5.fc = nn.Linear(input, num_classes)

# Send model to cuda
base_lenet5.to(device)





optimizer = optim.Adam(base_lenet5.parameters(), lr=learning_rate)

num_epochs = 30

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 71.1MB/s]


In [31]:
for epoch in range(num_epochs):
    base_lenet5.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, samples in enumerate(dataloaders['train']): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = base_lenet5(inputs)
        loss = F.cross_entropy(outputs, torch.max(labels, 1)[1])

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += torch.sum(predicted == torch.max(labels.data,1).indices)

    val_loss = evaluate(base_lenet5, dataloaders['val'])

In [33]:
base_lenet5.eval()

# Initialize variables
correct = 0
total = 0
running_loss = 0.0

# Loss function
criterion = nn.CrossEntropyLoss()

# Initialize lists to store true and predicted labels
true_labels = []
pred_labels = []

with torch.no_grad():
    for inputs, samples in enumerate(test_dataloaders): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = base_lenet5(inputs)
        _, labels = torch.max(labels, 1)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)

        # Get predictions
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Store true and predicted labels
        true_labels.append(labels.cpu().numpy())
        pred_labels.append(predicted.cpu().numpy())

# Convert lists to numpy arrays
true_labels = np.concatenate(true_labels, axis=0)
pred_labels = np.concatenate(pred_labels, axis=0)

# Calculate accuracy
accuracy = 100 * correct / total

# Calculate average loss
average_loss = running_loss / total

# Calculate F1 score
f1 = f1_score(true_labels, pred_labels, average='macro')

# Print accuracy, loss, and F1 score
print(f"Accuracy: {accuracy:.2f}%")
print(f"Loss: {average_loss:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 95.24%
Loss: 0.2068
F1 Score: 0.9357


# DenseNet-161

In [19]:
learning_rate = 0.0008344

# Setting device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load pre-trained Lenet5
densenet161 = models.densenet161(pretrained=True)
input = densenet161.classifier.in_features

# Change FC layer outputs to number of classes
densenet161.classifier = nn.Linear(input, num_classes)

# Send model to cuda
densenet161.to(device)





optimizer = optim.Adam(densenet161.parameters(), lr=learning_rate)

num_epochs = 30

Downloading: "https://download.pytorch.org/models/densenet161-8d451a50.pth" to /root/.cache/torch/hub/checkpoints/densenet161-8d451a50.pth
100%|██████████| 110M/110M [00:00<00:00, 185MB/s]


In [20]:
for epoch in range(num_epochs):
    densenet161.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, samples in enumerate(dataloaders['train']): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = densenet161(inputs)
        loss = F.cross_entropy(outputs, torch.max(labels, 1)[1])

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += torch.sum(predicted == torch.max(labels.data,1).indices)

    val_loss = evaluate(densenet161, dataloaders['val'])

In [34]:
densenet161.eval()

# Initialize variables
correct = 0
total = 0
running_loss = 0.0

# Loss function
criterion = nn.CrossEntropyLoss()

# Initialize lists to store true and predicted labels
true_labels = []
pred_labels = []

with torch.no_grad():
    for inputs, samples in enumerate(test_dataloaders): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = densenet161(inputs)
        _, labels = torch.max(labels, 1)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)

        # Get predictions
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Store true and predicted labels
        true_labels.append(labels.cpu().numpy())
        pred_labels.append(predicted.cpu().numpy())

# Convert lists to numpy arrays
true_labels = np.concatenate(true_labels, axis=0)
pred_labels = np.concatenate(pred_labels, axis=0)

# Calculate accuracy
accuracy = 100 * correct / total

# Calculate average loss
average_loss = running_loss / total

# Calculate F1 score
f1 = f1_score(true_labels, pred_labels, average='macro')

# Print accuracy, loss, and F1 score
print(f"Accuracy: {accuracy:.2f}%")
print(f"Loss: {average_loss:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 95.97%
Loss: 0.1757
F1 Score: 0.9382


# MobileNet-v2

In [22]:
learning_rate = 0.0007877

# Setting device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load pre-trained Lenet5
mobilenet_v2 = models.mobilenet_v2(pretrained=True)
input = mobilenet_v2.classifier[-1].in_features

# Change FC layer outputs to number of classes
mobilenet_v2.classifier[-1] = nn.Linear(input, num_classes)

# Send model to cuda
mobilenet_v2.to(device)





optimizer = optim.Adam(mobilenet_v2.parameters(), lr=learning_rate)

num_epochs = 30

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 187MB/s]


In [23]:
true_labels = []
predicted_labels = []


for epoch in range(num_epochs):
    mobilenet_v2.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, samples in enumerate(dataloaders['train']): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = mobilenet_v2(inputs)
        loss = F.cross_entropy(outputs, torch.max(labels, 1)[1])

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        true_labels.extend(labels.cpu().numpy())
        total += labels.size(0)
        correct += torch.sum(predicted == torch.max(labels.data,1).indices)

    val_loss = evaluate(mobilenet_v2, dataloaders['val'])

In [35]:
mobilenet_v2.eval()

# Initialize variables
correct = 0
total = 0
running_loss = 0.0

# Loss function
criterion = nn.CrossEntropyLoss()

# Initialize lists to store true and predicted labels
true_labels = []
pred_labels = []

with torch.no_grad():
    for inputs, samples in enumerate(test_dataloaders): 
        inputs, labels = samples
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = mobilenet_v2(inputs)
        _, labels = torch.max(labels, 1)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)

        # Get predictions
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Store true and predicted labels
        true_labels.append(labels.cpu().numpy())
        pred_labels.append(predicted.cpu().numpy())

# Convert lists to numpy arrays
true_labels = np.concatenate(true_labels, axis=0)
pred_labels = np.concatenate(pred_labels, axis=0)

# Calculate accuracy
accuracy = 100 * correct / total

# Calculate average loss
average_loss = running_loss / total

# Calculate F1 score
f1 = f1_score(true_labels, pred_labels, average='macro')

# Print accuracy, loss, and F1 score
print(f"Accuracy: {accuracy:.2f}%")
print(f"Loss: {average_loss:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 95.45%
Loss: 0.2372
F1 Score: 0.9208
