In [28]:
#importing necessary libraries.
import numpy as np 
import pandas as pd 
import torch #calling torch to store tensors
import os
#image related libraries
import cv2
from PIL import Image
#performace metrics using data visualization
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#deeplearning model and training
from sklearn.model_selection import train_test_split
import torch.nn as nn #used for weight and bias tensors
import torch.nn.functional as F #for activation functions
from torch.optim import SGD #stochastic gradient descent
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

#miscellaneous libraries
import time
import datetime
import random

In [29]:
#reading and visualizing data
#print(os.listdir('/Users/nikhilkunapareddy/Documents/projects/6_traffic_signal_detection/Data/archive')) #print statement to check the input folder contents

In [30]:
#setting data structures
data = []
labels = []
classes = 43
cur_path = os.getcwd()

#extracting dataset 
for i in range(classes):
    path = os.path.join('/Users/nikhilkunapareddy/Documents/projects/6_traffic_signal_detection/Data/archive','train',str(i))
    images = os.listdir(path)
    for a in images:
        try:
            image = Image.open(path + '/'+ a)
            image = image.resize((30,30))
            image = np.array(image)
            data.append(image)
            labels.append(i)
        except:
            print("Error loading image")

#converting lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

In [31]:
#print(data.shape, labels.shape)

In [32]:
#splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)
y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)
y_train = F.one_hot(y_train, num_classes=43)
y_test = F.one_hot(y_test, num_classes=43)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

In [33]:
#print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [93]:
#defining the neural network structure.
class ConvNet(nn.Module): #creating a new class is like creating a new neural netwrok. This is inherited from the pytorch class nn.Module
    def __init__(self): #inititalization method for the new class
        super(ConvNet, self).__init__() #call initialization method for the parent class
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 8, kernel_size = 3, padding = 1) #applies a 2D convolution over an input signal composed of several input planes #https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
        self.bn1 = nn.BatchNorm2d(8) #applies batch normalization over a 4D input
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = 3, padding = 1) 
        self.bn2 = nn.BatchNorm2d(16) #applies batch normalization over a 4D input
        self.conv3 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3, padding = 1) #what is meant by stride?
        self.bn3 = nn.BatchNorm2d(32)
        #self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        #self.dropout1 = nn.Dropout(0.15)
        self.conv4 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1)
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = 1)
        self.bn5 = nn.BatchNorm2d(128)
        #self.dropout2 = nn.Dropout(0.20)
        #self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128, 64)  #applies a linear transformation to the incoming data y = XAT+b
        self.bn6 = nn.BatchNorm2d(64)
        #self.dropout3 = nn.Dropout(0.25)
        self.fc2 = nn.Linear(64, 32)
        self.bn7 = nn.BatchNorm2d(32)
        self.fc3 = nn.Linear(32, classes)
        #self.dropout = nn.Dropout(0.5) #during training, randomly zeroes some of the elements of the input tensor with probability 'p'
        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2) #applies a 2D max pooling over an input signal composed of several input planes
#forward pass funtion. this takes the image information as an input vector and makes changes.
    def forward(self, x):
        print(x.shape)
        x = self.pool(F.relu(self.bn1(self.conv1(x)))) #first convolution
        print(x.shape)
        x = F.relu(self.bn2(self.conv2(x))) #second convolution
        print(x.shape)
        x = self.pool(F.relu(self.bn3(self.conv3(x)))) #third convolution
        print(x.shape)
        x = F.relu(self.bn4(self.conv4(x))) #fourth convolution
        print(x.shape)
        x = self.pool(F.relu(self.bn5(self.conv5(x)))) #fifth convolution
        x = x.permute(0, 2, 3, 1)
        print(x.shape)
        print("done")
        #x = x.view(-1, 128 * 16 * 16)
        #x = x.permute(0, 3, 1, 2)
        x = F.relu(self.bn6(self.fc1(x))) #fully connected refers to the point that every neuron in this layer is going to be fully connected to attaching neurons
        print(x.shape)
        #x = self.dropout(x) #dropout rate, the probability of a neuron being deactivated
        print(x.shape)
        x = F.relu(self.bn7(self.fc2(x))) 
        x = self.dropout(x)
        print(x.shape)
        x = self.fc3(x)
        print(x.shape)
        return F.log_softmax(x, dim=1)

In [94]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = ConvNet().to(device) #is this correct?

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, shuffle=True)

val_dataset = TensorDataset(X_test, y_test)
val_loader = DataLoader(val_dataset, shuffle=False)

In [72]:
"""
for inputs, labels in train_loader:
    print(labels.shape)
"""

'\nfor inputs, labels in train_loader:\n    print(labels.shape)\n'

In [95]:
#training the model
epochs = 100
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        #moving inputs and labels into the GPU
        inputs, labels = inputs.to(device), labels.to(device)
        inputs = inputs.permute(0, 3, 1, 2) #a fix to rearrange the color channels
        #inputs = inputs.unsqueeze(0)
        optimizer.zero_grad()
        #forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        #backward pass and optimization
        loss.backward()
        optimizer.step() 
        #loss accumilation
        running_loss += loss.item() * inputs.size(0)
    #calculating the avergae loss over epoch
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

torch.Size([1, 3, 30, 30])
torch.Size([1, 8, 15, 15])
torch.Size([1, 16, 15, 15])
torch.Size([1, 32, 7, 7])
torch.Size([1, 64, 7, 7])
torch.Size([1, 3, 3, 128])
done


RuntimeError: running_mean should contain 3 elements not 64