# Steps to solve Emergency vs Non-Emergency vehicle classification problem

<ol>1. Import necessary libraries</ol>
<ol>2. Loading and pre-processing the Data</ol>
<ol>3. Load weights of pretrained model</ol>
<ol>4. Fine tune the model for the current problem</ol>
<ol>5. Validate if it works fine, iterate again if it does not</ol>

## 1. Import necessary libraries

In [1]:
# importing the required libraries
%matplotlib inline
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image

# importing libraries for defining the architecture of model
import torch
import torch.nn as nn ## nn PyTorch's neural network module
from torch.optim import Adam # provides various optimization algorithms for training neural networks. SGD ADAM ADAGRAD RMSProp
from torch.nn import Linear, ReLU, BCELoss, Sequential, Sigmoid

# import torchvision
from torchvision.models import googlenet #consists of popular datasets, model architectures, and common image transformations for computer vision tasks
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader# provides classes and functions for working with datasets and data loaders.

#DataLoader, it is a class from the torch.utils.data module in PyTorch. The DataLoader class is used to load data from a dataset in batches.

## 2. Loading and pre-processing the Dataset



In [2]:
#mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#extract the images
!unzip '/content/drive/MyDrive/CNN_RNN_SIT/Dataset.zip'

Archive:  /content/drive/MyDrive/CNN_RNN_SIT/Dataset.zip
   creating: Dataset/
  inflating: Dataset/emergency_classification.csv  
   creating: Dataset/images/
  inflating: Dataset/images/0.jpg    
  inflating: Dataset/images/10.jpg   
  inflating: Dataset/images/1002.jpg  
  inflating: Dataset/images/1006.jpg  
  inflating: Dataset/images/1007.jpg  
  inflating: Dataset/images/1009.jpg  
  inflating: Dataset/images/1013.jpg  
  inflating: Dataset/images/1014.jpg  
  inflating: Dataset/images/1016.jpg  
  inflating: Dataset/images/1019.jpg  
  inflating: Dataset/images/102.jpg  
  inflating: Dataset/images/1020.jpg  
  inflating: Dataset/images/1022.jpg  
  inflating: Dataset/images/1023.jpg  
  inflating: Dataset/images/1026.jpg  
  inflating: Dataset/images/1031.jpg  
  inflating: Dataset/images/1035.jpg  
  inflating: Dataset/images/1036.jpg  
  inflating: Dataset/images/1037.jpg  
  inflating: Dataset/images/104.jpg  
  inflating: Dataset/images/1040.jpg  
  inflating: Dataset/imag

In [4]:
# defining the pre-processing steps
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],# Red green Blue channel
                                 std=[0.229, 0.224, 0.225])

preprocessing = transforms.Compose([transforms.ToTensor(),
                                    normalize,
])

In [5]:
# defining the class to load dataset
class EmergencyDataset(Dataset): #It defines a new(Custom) class called EmergencyDataset inherits from Dataset class
    """Custom Dataset for loading Emergency Dataset"""

    # defining the init function
    def __init__(self, csv_path, img_dir, transform): #constructor method for the EmergencyDataset class. It initializes a new instance of the dataset.

        df = pd.read_csv(csv_path) #reads a CSV file
        self.img_dir = img_dir #assigns the img_dir parameter (which is the directory containing the images) to the img_dir attribute of the dataset object
        self.csv_path = csv_path #This line assigns the csv_path parameter (which is the path to the CSV file) to the csv_path attribute of the dataset object
        self.img_names = df.image_names.values #This line extracts the image names from the DataFrame df and assigns them to the img_names attribute of the dataset object
        self.y = df['emergency_or_not'].values #extracts the labels ('emergency_or_not') from the DataFrame df and assigns them to the y attribute of the dataset object.
        self.transform = transform

    # # Define the method to get an item from the dataset
    def __getitem__(self, index):
        img = Image.open(self.img_dir + self.img_names[index]) ## Open the image using PIL (Python Imaging Library)
        ## Apply transformations to the image if they are provided
        if self.transform is not None:
            img = self.transform(img)
        # Get the label for the image
        label = self.y[index]
        return img, label ## Return the transformed image and its label

    # defining the len function
    def __len__(self):
        return self.y.shape[0]

In [6]:
# loading the dataset
train_dataset = EmergencyDataset(csv_path='/content/Dataset/emergency_classification.csv',
                              img_dir='/content/Dataset/images/',
                              transform=preprocessing)

In [7]:
# using the defined dataset to load data in batch using Dataloader
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=32,
                          shuffle=True)

In [8]:
# getting the first batch
for batch_idx, (batch_X, batch_y) in enumerate(train_loader):
  break

In [9]:
# shape of the image and label
batch_X.shape, batch_y.shape

(torch.Size([32, 3, 224, 224]), torch.Size([32]))

## 3. Load weights of pretrained model

In [10]:
# define model architecture along with pretrained weights of googlenet / inception_v1
googlenet_model = googlenet(pretrained=True)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 92.8MB/s]


In [11]:
# print architecture of googlenet
googlenet_model

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [12]:
# architecture in form of a list
list(googlenet_model.children())

[BasicConv2d(
   (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
   (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
 ),
 MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True),
 BasicConv2d(
   (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
   (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
 ),
 BasicConv2d(
   (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
   (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
 ),
 MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True),
 Inception(
   (branch1): BasicConv2d(
     (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
   )
   (branch2): Sequential(
     (0): BasicConv2d(
       (conv): 

In [13]:
# removing the fully connected layers
list(googlenet_model.children())[:-3]

[BasicConv2d(
   (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
   (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
 ),
 MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True),
 BasicConv2d(
   (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
   (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
 ),
 BasicConv2d(
   (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
   (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
 ),
 MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True),
 Inception(
   (branch1): BasicConv2d(
     (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
   )
   (branch2): Sequential(
     (0): BasicConv2d(
       (conv): 

In [14]:
# defining the class to extract features
class FeatureExtractor(nn.Module):#This is the constructor method for the FeatureExtractor class. nn.Module provides a flexible and convenient way to define neural network architectures in PyTorch
    def __init__(self):
        super(FeatureExtractor, self).__init__()#calls the constructor of the parent class (nn.Module) to initialize the FeatureExtractor class.
        self.net = googlenet(pretrained=True)#loads the pre-trained GoogLeNet model.
        #push to cuda
        if torch.cuda.is_available():#checks if a CUDA-enabled GPU is available.
            self.net = self.net.cuda()#If a GPU is available, this line moves the model to the GPU.
        for p in self.net.parameters():
            p.requires_grad = False #Loop -freezes (disables gradient computation for) all parameters of the model so that they are not updated during training.
        # Define which layers you are going to extract
        self.features = nn.Sequential(*list(self.net.children())[:-3])

    def forward(self, x): #defines the forward pass of the FeatureExtractor class.
        return self.features(x)

In [16]:
# prepare input
input = batch_X[:2]
input = input.cuda()

# pass the input - Python idiom that checks if the script is being run as the main program.
# This block of code will only run if the script is executed directly, not if it is imported as a module.
if __name__ == "__main__":
    fe = FeatureExtractor()
    output = fe(input)

# shape of the output
output.shape

torch.Size([2, 1024, 7, 7])

## 4. Fine tune the model for the current problem
Steps:-
1. Extract the features
2. Flatten the data
3. Define a Neural Network Model
4. Compile the model
5. Train the model


### 4.1 Extract the features

In [17]:
# extract features using pretrained model

# create an empty array to store features
features = []
target = []
time_elapsed = []

# set model to eval
googlenet_model.eval()
#context manager temporarily sets all the requires_grad flags to False inside the block.
#This is useful during inference when you don't need to compute gradients, saving memory and computation.
#deactivates autograd
with torch.no_grad():

  # getting the data in batches using defined data loader
  for batch_idx, (batch_X, batch_y) in enumerate(train_loader):
    if torch.cuda.is_available():
        batch_X = batch_X.cuda()  #iterates over the batches of data (batch_X and batch_y) from the train_loader.

    # to record the time for extracting features
    start = torch.cuda.Event(enable_timing=True)#line creates a CUDA event start for recording the start time of the feature extraction process.
    end = torch.cuda.Event(enable_timing=True)

    start.record()##records the start time of the feature extraction process.
    # extracting features
    if __name__ == "__main__":

        fe = FeatureExtractor()
        batch_features = fe(batch_X)

    end.record()

    # Waits for everything to finish running
    torch.cuda.synchronize() #waits for all CUDA stream tasks to complete, ensuring that the timing measurement is accurate.

    time_elapsed.append(start.elapsed_time(end)) #calculates the elapsed time for the feature extraction process and appends it to the time_elapsed list.
    #converting to numpy
    batch_features = batch_features.data.cpu().numpy()

    # append in list
    features.append(batch_features)
    target.append(batch_y)


#save to the array
features = np.concatenate(features, axis=0)
target = np.concatenate(target, axis=0)

In [18]:
# time taken to extract features
print('Time taken in seconds: ', torch.sum(torch.tensor(time_elapsed))*0.001)

Time taken in seconds:  tensor(18.0230)


In [19]:
# shape of the features
features.shape

(2352, 1024, 7, 7)

### 4.2 Flatten the data


In [20]:
#flattening the features
features = features.reshape(len(features),-1)
features.shape

(2352, 50176)

In [21]:
# creating the training and validation data
X_train, X_valid, y_train, y_valid = train_test_split(features, target, test_size=0.3, stratify=target, random_state=42)

In [22]:
# shape of training and validation set
(X_train.shape, y_train.shape), (X_valid.shape, y_valid.shape)

(((1646, 50176), (1646,)), ((706, 50176), (706,)))

In [23]:
# converting training and validation set to PyTorch tensor
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)

X_valid = torch.FloatTensor(X_valid)
y_valid = torch.FloatTensor(y_valid)

### 4.3 Define a Neural Network Model


In [24]:
# defining the model architecture
model = Sequential(Linear(1024 * 7 * 7, 64),
                   ReLU(),
                   Linear(64, 1),
                   Sigmoid()
                   )

In [25]:
  # summary of the model
  model

Sequential(
  (0): Linear(in_features=50176, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=1, bias=True)
  (3): Sigmoid()
)

In [26]:
# pass an input to the model to understand the output
model(X_train[0].view(1,1024*7*7))

tensor([[0.5295]], grad_fn=<SigmoidBackward0>)

## 4.4. Compile the model

In [27]:
# define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.BCELoss()

# checking if GPU is available
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()

### 4.5 Train the model


In [28]:
#define metric
def binary_accuracy(preds, y):

    #round predictions to the closest integer
    rounded_preds = torch.round(preds)

    #no. of correctly classified
    correct = (rounded_preds == y).float()

    #compute accuracy
    acc = correct.sum() / len(correct)
    return acc

In [29]:
# define training function
def train(X,y,batch_size):

  #activate training phase
  model.train()

  #initialization
  epoch_loss, epoch_acc= 0, 0
  no_of_batches = 0

  #randomly create indices
  indices= torch.randperm(len(X))

  #loading in batches
  for i in range(0,len(indices),batch_size):

    #indices for a batch
    ind = indices[i:i+batch_size]

    #batch
    batch_x=X[ind]
    batch_y=y[ind]

    #push to cuda
    if torch.cuda.is_available():
        batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

    #clear gradients
    optimizer.zero_grad()

    #forward pass
    outputs = model(batch_x)

    #converting to a 1 dimensional tensor
    outputs = outputs.squeeze()

    #calculate loss and accuracy
    loss = criterion(outputs, batch_y)
    acc = binary_accuracy(outputs, batch_y)

    #Backward pass
    loss.backward()

    #Update weights
    optimizer.step()

    #Keep track of the loss and accuracy of a epoch
    epoch_loss = epoch_loss + loss.item()
    epoch_acc  = epoch_acc  + acc.item()

    #No. of batches
    no_of_batches = no_of_batches+1

  return epoch_loss/no_of_batches, epoch_acc/no_of_batches #he average loss and accuracy per batch for an epoch

Evalution Function

In [30]:
# define evaluation function
def evaluate(X,y,batch_size):

  #deactivate training phase
  model.eval()

  #initialization
  epoch_loss, epoch_acc= 0, 0
  no_of_batches = 0

  #randomly create indices
  indices= torch.randperm(len(X))

  #deactivates autograd
  with torch.no_grad():

    #loading in batches
    for i in range(0,len(indices),batch_size):

      #indices for a batch
      ind = indices[i:i+batch_size]

      #batch
      batch_x= X[ind]
      batch_y= y[ind]

      #push to cuda
      if torch.cuda.is_available():
          batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

      #Forward pass
      outputs = model(batch_x)

      #converting the output to 1 Dimensional tensor
      outputs = outputs.squeeze()

      # Calculate loss and accuracy
      loss = criterion(outputs, batch_y)
      acc = binary_accuracy(outputs, batch_y)

      #keep track of loss and accuracy of an epoch
      epoch_loss = epoch_loss + loss.item()
      epoch_acc  = epoch_acc  + acc.item()

      #no. of batches
      no_of_batches = no_of_batches + 1

    return epoch_loss/no_of_batches, epoch_acc/no_of_batches

Prediction Function

In [31]:
# define prediction function
def predict(X,batch_size):

  #deactivate training phase
  model.eval()

  # initialization
  predictions = []

  # create indices
  indices = torch.arange(len(X))

  #deactivates autograd
  with torch.no_grad():

      for i in range(0, len(X), batch_size):

        #indices for a batch
        ind = indices[i:i+batch_size]

        # batch
        batch_x = X[ind]

        #push to cuda
        if torch.cuda.is_available():
            batch_x = batch_x.cuda()

        #Forward pass
        outputs = model(batch_x)

        #converting the output to 1 Dimensional tensor
        outputs = outputs.squeeze()

        # convert to numpy array
        prediction = outputs.data.cpu().numpy()
        predictions.append(prediction)

  # convert to single numpy array
  predictions = np.concatenate(predictions, axis=0)

  return predictions

Training

In [38]:
N_EPOCHS = 50
batch_size = 32

# intialization
best_valid_acc = 0

for epoch in range(N_EPOCHS):

    #train the model
    train_loss, train_acc  = train(X_train, y_train, batch_size) #Train function applied

    #evaluate the model
    valid_loss, valid_acc = evaluate(X_valid, y_valid, batch_size)

    print('\nEpoch :',epoch,
          'Training loss:',round(train_loss,4),
          '\tTrain Accuracy:',round(train_acc,4),
          '\tValidation loss:',round(valid_loss,4),
          '\tValidation Accuracy:',round(valid_acc,4))

    #save the best model
    if best_valid_acc <= valid_acc:
        best_valid_acc = valid_acc
        torch.save(model.state_dict(), 'saved_weights.pt')
        print("\n----------------------------------------------------Saved best model------------------------------------------------------------------")


Epoch : 0 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.3653 	Validation Accuracy: 0.9307

----------------------------------------------------Saved best model------------------------------------------------------------------

Epoch : 1 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.3656 	Validation Accuracy: 0.9293

Epoch : 2 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.368 	Validation Accuracy: 0.9321

----------------------------------------------------Saved best model------------------------------------------------------------------

Epoch : 3 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.3673 	Validation Accuracy: 0.9307

Epoch : 4 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.5387 	Validation Accuracy: 0.8859

Epoch : 5 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.3703 	Validation Accuracy: 0.9266

Epoch : 6 Training loss: 0.0001 	Train Accuracy: 1.0 	Validation loss: 0.4258 	V

## 5. Validate if it works fine, iterate again if it does not

In [39]:
 #load weights of best model
path='saved_weights.pt'
model.load_state_dict(torch.load(path))

<All keys matched successfully>

In [40]:
valid_loss, valid_accuracy = evaluate(X_valid,y_valid,batch_size)

print("Validation Accuracy:",(valid_accuracy)*100)

Validation Accuracy: 93.20652173913044
