In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np

import cv2
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models


**Formulation of the problem**

For an image consisting of a road sign, predict the bounding box around the road sign and determine the type of road sign. These signs can belong to four different classes:

Traffic light Stop Speed ​​Limit Crosswalk This is called a multi-task learning task because it involves performing two tasks:
1) regression to find the coordinates of the bounding box,
2) classification to determine the type of road sign.

**Dataset**
It consists of 877 images. This is a fairly unbalanced dataset, most of the images are in the rate limit class, but since we are more focused on bounding box prediction, we can ignore the imbalance.

**Loading data**

Descriptions for each image are stored in separate XML files. Let's take the following steps to prepare data for training:

Let's go through the annotations directory to get all the .xml files
Let's read the information we need from each .xml file using xml.etree.ElementTree
Let's create a dictionary containing filepath (path to the image), width, height, (xmin , xmax , ymin , ymax) (coordinates of the bounding box) and class and add the dictionary to the list.
Let's create a pandas dataframe using the list of dictionaries from the previous paragraph

In [None]:
images_path = Path('/content/drive/MyDrive/Fall 2023/ML/Project/traffic_Data/road-sign-detection/images')
anno_path = Path('/content/drive/MyDrive/Fall 2023/ML/Project/traffic_Data/road-sign-detection/annotations')

#images_path = Path('/content/drive/MyDrive/ML project/Fall 2023/ML/Project/traffic_Data/road-sign-detection/images')
#anno_path = Path('/content/drive/MyDrive/ML project/Fall 2023/ML/Project/traffic_Data/road-sign-detection/annotations')


def filelist(root, file_type):
    """The function returns a fully qualified list of files in a directory"""
    return [os.path.join(directory_path, f) for directory_path, directory_name,
            files in os.walk(root) for f in files if f.endswith(file_type)]

def generate_train_df (anno_path):             #here it is creatextracting each images meta data and location of box in that image, this data used for training later
    annotations = filelist(anno_path, '.xml')
    print(annotations)
    anno_list = []
    for anno_path in annotations:
        root = ET.parse(anno_path).getroot()
        anno = {}
        anno['filename'] = Path(str(images_path) + '/'+ root.find("./filename").text)
        anno['width'] = root.find("./size/width").text
        anno['height'] = root.find("./size/height").text
        anno['class'] = root.find("./object/name").text
        anno['xmin'] = int(root.find("./object/bndbox/xmin").text)
        anno['ymin'] = int(root.find("./object/bndbox/ymin").text)
        anno['xmax'] = int(root.find("./object/bndbox/xmax").text)
        anno['ymax'] = int(root.find("./object/bndbox/ymax").text)
        anno_list.append(anno)
    return pd.DataFrame(anno_list)

    print()



In [None]:
df_train = generate_train_df(anno_path)
df_train

In [None]:
df_train['class'].value_counts()     #no.of unique classes

**Let's convert our labels into classes:**

In [None]:
class_dict = {'speedlimit': 0, 'stop': 1, 'crosswalk': 2, 'trafficlight': 3}
df_train['class'] = df_train['class'].apply(lambda x:  class_dict[x])   #changing string to int

print(df_train.shape)
df_train.head()

**Resizing images and bounding boxes**
Since images must be the same size to train a computer vision model, we need to resize our images and their corresponding bounding boxes. Resizing an image is easy, but resizing a bounding box is a little more difficult because each rectangle depends on the image and its dimensions.

Here's the basic idea:

Let's transform the bounding box into an image (mask) of the same size as the image corresponding to this rectangle. This mask will simply have 0 for the background and 1 for the area covering the bounding box.

Let's read the image first:

In [None]:
def read_image(path):
    return cv2.cvtColor(cv2.imread(str(path)), cv2.COLOR_BGR2RGB)


def create_mask(bb, x):
    rows,cols,*_ = x.shape
    Y = np.zeros((rows, cols))
    bb = bb.astype(np.int)
    Y[bb[0]:bb[2], bb[1]:bb[3]] = 1.
    return Y

def mask_to_bb(Y):
    cols, rows = np.nonzero(Y)
    if len(cols) == 0:
        return np.zeros(4, dtype=np.float32)
    top_row = np.min(rows)
    left_col = np.min(cols)
    bottom_row = np.max(rows)
    right_col = np.max(cols)
    return np.array([left_col, top_row, right_col, bottom_row], dtype=np.float32)


def create_bb_array(x):
    return np.array([x[5],x[4],x[7],x[6]])

def resize_image_bb(read_path, write_path, bb, sz):
    im = read_image(read_path)
    im_resized = cv2.resize(im, (sz, sz))
    Y_resized = cv2.resize(create_mask(bb, im), (sz, sz))
    new_path = str(write_path/read_path.parts[-1])
    cv2.imwrite(new_path, cv2.cvtColor(im_resized, cv2.COLOR_RGB2BGR))
    return new_path, mask_to_bb(Y_resized)

**Let's apply all our written functions:**

In [None]:
IM_SIZE = 300

In [None]:
#making all images the same size
new_paths = []
new_bbs = []
train_path_resized = Path('./images_resized')
Path.mkdir(train_path_resized, exist_ok=True)


for index, row in df_train.iterrows():
    new_path,new_bb = resize_image_bb(row['filename'], train_path_resized, create_bb_array(row.values), IM_SIZE)
    new_paths.append(new_path)
    new_bbs.append(new_bb)


df_train['new_path'] = new_paths   #saving new img location
df_train['new_bb'] = new_bbs       #saving new boundary data

df_train.head()

**Example of the resulting sample**

In [None]:
print(torch.cuda.is_available())
num_gpus = torch.cuda.device_count()
print(num_gpus)
torch.cuda.set_device(0)

In [None]:
#im = cv2.imread(str(df_train.values[30][0]))  somehow changed to 1
im = cv2.imread(str(df_train.values[30][0]))
print(df_train.values[30][0])
bb = create_bb_array(df_train.values[30])

print(im.shape)

Y = create_mask(bb, im)
mask_to_bb(Y)

plt.imshow(im)

In [None]:
plt.imshow(Y, cmap='gray')

**Data Augmentation**
Data augmentation is a technique that allows us to better generalize our model by creating new training images using different variations of existing images. Our current training set only has 800 images, so increasing the data is important to prevent our model from overfitting.
For this task we will use flip, rotate, center trim and random trim.

The only thing to remember here is to make sure that the bounding box also transforms in the same way as the image. To do this, we follow the same approach as resizing - convert the bounding box to a mask, apply the same transformations to the mask as the original image, and extract the coordinates of the bounding box.

In [None]:

def crop(im, r, c, target_r, target_c):
    return im[r:r+target_r, c:c+target_c]

def center_crop(x, r_pix=8):
    r, c,*_ = x.shape
    c_pix = round(r_pix*c/r)
    return crop(x, r_pix, c_pix, r-2*r_pix, c-2*c_pix)

def rotate_cv(im, deg, y=False, mode=cv2.BORDER_REFLECT):

    r,c,*_ = im.shape
    M = cv2.getRotationMatrix2D((c/2,r/2),deg,1)
    if y:
        return cv2.warpAffine(im, M, (c, r), borderMode=cv2.BORDER_CONSTANT)
    return cv2.warpAffine(im, M, (c, r), borderMode=mode, flags=cv2.WARP_FILL_OUTLIERS)

def random_cropXY(x, Y, r_pix=8):

    r, c,*_ = x.shape
    c_pix = round(r_pix * c/r)
    rand_r = random.uniform(0, 1)
    rand_c = random.uniform(0, 1)
    start_r = np.floor(2 * rand_r * r_pix).astype(int)
    start_c = np.floor(2 * rand_c * c_pix).astype(int)
    xx = crop(x, start_r, start_c, r - 2*r_pix, c - 2*c_pix)
    YY = crop(Y, start_r, start_c, r - 2*r_pix, c - 2*c_pix)
    return xx, YY


def transformsXY(path, bb, is_transforms):
    x = cv2.imread(str(path)).astype(np.float32)
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB) / 255
    Y = create_mask(bb, x)
    if is_transforms:
        rdeg = (np.random.random()-.50) * 20
        x = rotate_cv(x, rdeg)
        Y = rotate_cv(Y, rdeg, y=True)
        if np.random.random() > 0.5:
          x = np.fliplr(x).copy()
          Y = np.fliplr(Y).copy()
        x, Y = random_cropXY(x, Y)
    else:
        x, Y = center_crop(x), center_crop(Y)
    return x, mask_to_bb(Y)

def create_corner_rect(bb, color='red'):
    bb = np.array(bb, dtype=np.float32)
    return plt.Rectangle((bb[1], bb[0]), bb[3]-bb[1], bb[2]-bb[0], color=color,
                         fill=False, lw=3)

def create_corner_rect_pred(bb, color='green'):
    bb = np.array(bb, dtype=np.float32)
    return plt.Rectangle((bb[1], bb[0]), bb[3]-bb[1], bb[2]-bb[0], color=color,
                         fill=False, lw=3)

def show_corner_bb(im, bb):
    plt.imshow(im)
    plt.gca().add_patch(create_corner_rect(bb))

**Example image**
Original:

In [None]:
number = 45
im = cv2.imread(str(df_train['new_path'].values[number]))
print(str(df_train.values[number][8]))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
show_corner_bb(im, df_train['new_bb'].values[number])

**After transformation:**

In [None]:
im, bb = transformsXY(str(df_train['new_path'].values[number]),
                      df_train['new_bb'].values[number],
                      is_transforms=True)
show_corner_bb(im, bb)

**Dataset**
Now that we have our data additions, we can create a PyTorch dataset. We normalize the images using ImageNet statistics because we will be using a pre-trained ResNet model and applying data augmentation to our dataset during training.

In [None]:
df_train = df_train.reset_index()
X = df_train[['new_path', 'new_bb']]    #storing new path list n corresponding bb in X
Y = df_train['class']                   #storing corresponding boxes in Y

#spliting data
X_training, X_test, y_training, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_training, y_training, test_size=0.2, random_state=42)


def normalize(im):
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im - imagenet_stats[0]) / imagenet_stats[1]

class RoadDataset(Dataset):
    def __init__(self, paths, bb, y, is_transforms=False):
        self.is_transforms = is_transforms
        self.paths = paths.values
        self.bb = bb.values
        self.y = y.values

    def __len__(self):          #gives no of images
        return len(self.paths)

    def __getitem__(self, idx): #gives transformed image , box boundarirs and sign class
        path = self.paths[idx]
        y_class = self.y[idx]
        x, y_bb = transformsXY(path, self.bb[idx], self.is_transforms)
        x = normalize(x)
        x = np.rollaxis(x, 2)
        return x, y_class, y_bb

train_ds = RoadDataset(X_train['new_path'], X_train['new_bb'], y_train, is_transforms=False) #making an object o the class
valid_ds = RoadDataset(X_val['new_path'], X_val['new_bb'], y_val) #making an object o the class

**Let's load all this into our dataloader:**


In [None]:
batch_size = 16
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)   #dataloader is where dat is loaded to train a nn
valid_dl = DataLoader(valid_ds, batch_size=batch_size)

**Model Definition**
As a model we will use a very simple pre-trained resNet-34 model. Since we have two tasks here, there are two final layers - bounding box regression and image classifier.

In [None]:
'''
class BB_model(nn.Module):
      #nn is pytorch ndel

    def _init_(self):
        super(BB_model, self)._init_()
        resnet = models.resnet34(pretrained=True)   #loading pretrained resnet34 model 'from torchvision import models'  this library has it

        layers = list(resnet.children())[:8]   #Retrieves the first 8 immediate child modules of the ResNet model (resnet) using the children() method. These modules typically correspond to the initial layers of the ResNet architecture, which include convolutional layers, batch normalization, and pooling layers.
        #new model for features
        self.features = nn.Sequential(*layers)   # Constructs a new sequential module (nn.Sequential) called features by using the first 8 layers obtained from the ResNet. The *layers syntax is used to unpack the list of layers and pass them as individual arguments to nn.Sequential.
        #new classifier model
        # self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4)) #Defines another sequential module called classifier containing a batch normalization layer (nn.BatchNorm1d(512)) followed by a linear layer (nn.Linear(512, 4)). This block is often used as a fully connected classifier. The input size to the linear layer is 512, and it outputs a tensor of size 4.
        self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 256),nn.ReLU(), nn.linear(256,4))
        #new bb model
        #self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))   #Defines yet another sequential module called bb with the same structure as the classifier. It consists of a batch normalization layer followed by a linear layer.
        self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 256),nn.ReLU(), nn.linear(256,4))
        #so instead of making all these models like this we can make out own models using the layes that resnet uses and adding thes layers ourself to have own work

    def forward(self, x):
        x = self.features(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)

        #forward propofare the model and then pass those values that output of featurs to classifier model and bb model to get the op
        return self.classifier(x), self.bb(x)
'''
class BB_model(nn.Module):  #nn is pytorch nn model

    def __init__(self):
        super(BB_model, self).__init__()

        resnet = models.resnet34(pretrained=True)   #loading pretrained resnet34 model 'from torchvision import models'  this library has it
        # resnet = models.resnet34(pretrained=True)   #loading pretrained resnet34 model 'from torchvision import models'  this library has it

        layers = list(resnet.children())[:8]   #Retrieves the first 8 immediate child modules of the ResNet model (resnet) using the children() method. These modules typically correspond to the initial layers of the ResNet architecture, which include convolutional layers, batch normalization, and pooling layers.

        #new model for features
        self.features = nn.Sequential(*layers)   # Constructs a new sequential module (nn.Sequential) called features by using the first 8 layers obtained from the ResNet. The *layers syntax is used to unpack the list of layers and pass them as individual arguments to nn.Sequential.

        #new classifier model
        self.classifier = nn.Sequential( nn.Linear(512, 4)) #Defines another sequential module called classifier containing a batch normalization layer (nn.BatchNorm1d(512)) followed by a linear layer (nn.Linear(512, 4)). This block is often used as a fully connected classifier. The input size to the linear layer is 512, and it outputs a tensor of size 4.
        #self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 256),nn.ReLU(), nn.Linear(256,128),nn.ReLU(), nn.Linear(128,4))
        #self.classifier = nn.Sequential(nn.Linear(512, 256),nn.ReLU(), nn.Linear(256,128),nn.ReLU(), nn.Linear(128,4))

        #new bb model
        self.bb = nn.Sequential( nn.Linear(512, 4))   #Defines yet another sequential module called bb with the same structure as the classifier. It consists of a batch normalization layer followed by a linear layer.
        #self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 256),nn.ReLU(), nn.Linear(256,128),nn.ReLU(), nn.Linear(128,4))
        #self.bb = nn.Sequential( nn.Linear(512, 256),nn.ReLU(), nn.Linear(256,128),nn.ReLU(), nn.Linear(128,4))

        #so instead of making all these models like this we can make out own models using the layes that resnet uses and adding thes layers ourself to have own work

    def forward(self, x):
        x = self.features(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)

        #forward propofare the model and then pass those values that output of featurs to classifier model and bb model to get the op
        return self.classifier(x), self.bb(x)


In [None]:
resnet = models.resnet34(pretrained=True)
# resnet = models.resnet34(pretrained=True)

list(resnet.children())[:8]

**Training **
To calculate the loss, we need to take into account both the classification loss and the bounding box regression loss, so we use a combination of cross-entropy and L1 loss (the sum of all absolute differences between the ground truth and the predicted coordinates).

In [None]:
model = BB_model().cuda()  #line 4 #assigning variable to model

params = [p for p in model.parameters() if p.requires_grad]  #saving model parameters in p
optimizer = torch.optim.Adam(params, lr=0.006)  #seting what optimizer to use
epochs = 100
# model

In [None]:
import matplotlib.pyplot as plt
training_loss = []
validation_loss = []

their_train_loss =[]
their_val_loss = []

def train():
    best_loss = 10000.0




    for i in range(epochs):
        model.train() #from line 4
        total = 0
        sum_loss = 0
        for x, y_class, y_bb in train_dl:  #as remenber train_dl is the dataloader that holds all the image locations, bb and lables
            len_batch = y_class.shape[0]
            x = x.cuda().float()   #sending data to gpu
            y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()

            out_class, out_bb = model(x)  #sending x to model and get its class n bb prediction

            # losses calculation
            loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="sum")

            #total loss
            loss = loss_class + loss_bb
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total += len_batch
            sum_loss += loss.item()

        #training_loss.append([loss_class.detach().cpu().numpy(),loss_bb.detach().cpu().numpy(),loss.detach().cpu().numpy()])




        #that is done for each image  and model is trained and optimized
        train_loss = sum_loss / total
        their_train_loss.append(train_loss)


        # Eval
        model.eval()
        val_total = 0
        val_sum_loss = 0
        correct = 0

        #and now that trained model is tested againest the validation dataset
        #note how there is no optimizer and step here
        for x, y_class, y_bb in valid_dl:
            len_batch = y_class.shape[0]

            x = x.cuda().float()
            y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()
            out_class, out_bb = model(x)

            loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="sum")
            loss = loss_class + loss_bb


            _, pred = torch.max(out_class, 1)      # prediction of classifier
            correct += (pred == y_class).sum().item() #accuracy


            val_sum_loss += loss.item()
            val_total += len_batch


        #validation_loss.append([loss_class.detach().cpu().numpy(),loss_bb.detach().cpu().numpy(),loss.detach().cpu().numpy()])

        #update_plot(training_loss, validation_loss)

        val_loss = val_sum_loss / val_total
        val_acc = correct / val_total

        their_val_loss.append(val_loss)

        if val_loss<best_loss:
          print("found better")
          best_loss = val_loss
          model_path = '/content/drive/MyDrive/Fall 2023/ML/Project/loss_100epoch_NpT_ResNet_IT_NT_BLRM.pth'  # pt:prettrained NT no transform AL addted layer1 blRM - batch norm removed IT in training

          # Save the model
          torch.save(model, model_path)


        print(f"Epoch [{i+1}/{epochs}]. train_loss {train_loss:.3f} val_loss {val_loss:.3f} val_acc {val_acc:.3f}")
        # break



**The trick:** after completing the training, we can change the training step and continue:

In [None]:
for i, param_group in enumerate(optimizer.param_groups):
    param_group["lr"] = 0.001

In [None]:
train()

In [None]:
import matplotlib.pyplot as plt

# Assuming training_loss and validation_loss are your lists of lists

# Sample data
#training_loss = [[1, 2, 3], [2, 3, 5], [3, 4, 7]]
#validation_loss = [[1, 3, 4], [2, 4, 6], [3, 5, 8]]

# Extracting the third column (total loss) for plotting
training_loss_values = [item for item in their_train_loss]
validation_loss_values = [item for item in their_val_loss]

# Plotting
plt.plot(training_loss_values, label='Training Loss', marker='o')
plt.plot(validation_loss_values, label='Validation Loss', marker='o')

# Adding labels and title
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')

# Adding legend
plt.legend()

# Display the plot
plt.show()

Saving the model

In [None]:
model_path = '/content/drive/MyDrive/ML project/Fall 2023/ML/Project/loss_100epoch_NpT_ResNet_AT_NT_BLRM.pth'

# Save the model
torch.save(model, model_path)

**Testing**

load model

In [None]:
#model_path = '/content/drive/MyDrive/Fall 2023/ML/Project/30epoch_pretained_ResNet_model_intraining.pth'
#model_path = '/content/drive/MyDrive/Fall 2023/ML/Project/30epoch_pretained_ResNet_model_aftertraining.pth'
# model_path = '/content/drive/MyDrive/Fall 2023/ML/Project/30epoch_pretained_ResNet_model_aftertraining.pth'
model_name="loss_100epoch_pT_ResNet_AT_NT_BLRM.pth"
model_path = '/content/drive/MyDrive/ML project/Fall 2023/ML/Project/'+model_name


model = torch.load(model_path)

trying to use test split data for the sake of testing

In [None]:
#load_data
test_ds = RoadDataset(X_test['new_path'], X_test['new_bb'], y_test, is_transforms=False)

#so here is the DS to be used  go to cell/line 186

In [None]:
'''
im = read_image('./images_resized/road789.png')
Path.mkdir(Path('./road_signs_test'), exist_ok=True)
cv2.imwrite('./road_signs_test/road789.jpg', cv2.cvtColor(im, cv2.COLOR_RGB2BGR))
'''
'''
#this is for checking for one single image

image = cv2.imread('/content/drive/MyDrive/Fall 2023/ML/Project/traffic_Data/road_sign_det_test/road57.png')
im = cv2.resize(image,(300,300))

Path.mkdir(Path('./road_signs_test'), exist_ok=True)
cv2.imwrite('./road_signs_test/road61.jpg', cv2.cvtColor(im, cv2.COLOR_RGB2BGR))
'''


In [None]:
class_dict

In [None]:
def is_black(pixel):
    if pixel[0] == 0 and pixel[1] == 0 and pixel[2] == 0:
        return True
    else:
        return False

def is_white(pixel):
    if pixel[0] == 255 and pixel[1] == 255 and pixel[2] == 255:
        return True
    else:
        return False
def mask(rectangle_coords, image_shape):
    # rectangle_coords is a tuple representing diagonal coordinates (x1, y1, x2, y2) of the rectangle
    # image_shape is a tuple representing the shape of the original image (height, width)

    # Create an empty black image (mask)
    # mask = np.zeros(image_shape, dtype=np.uint8)
    mask = np.zeros((image_shape[0], image_shape[1], 3), dtype=np.uint8)

    # Determine the coordinates of the rectangle
    # x1, y1, x2, y2 = rectangle_coords
    x1=int(rectangle_coords[0])
    y1=int(rectangle_coords[1])
    x2=int(rectangle_coords[2])
    y2=int(rectangle_coords[3])



    # Draw a white rectangle on the mask
    mask=cv2.rectangle(mask, (x1, y1), (x2, y2), (255, 255, 255), thickness=cv2.FILLED)


    return mask

def IoU(image, bb_pred, bb_gt):
# load predicted img and gt_img
        # img = cv2.imread(os.path.join(folder, file))
        # #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # gt = cv2.imread(os.path.join(gt_folder, gt_name))

        img=mask(bb_pred,(image.shape[0],image.shape[1]))
        # img= create_mask(bb_pred,image)
        gt=mask(bb_gt,(image.shape[0],image.shape[1]))
        # gt= create_mask(bb_gt,image)


        #gt = cv2.cvtColor(gt, cv2.COLOR_BGRA2RGB)

        #resize gt image
        # print(img.shape)
        height, width,_= img.shape
        gt = cv2.resize(gt, (width, height))
        intersect = 0.0
        union = 0.0
        # iou=0

        # count total number of gray pixels
        for i in range(height):
            for j in range(width):
                # if intersection
                if is_white(img[i, j]) and is_white(gt[i, j]):
                    intersect += 1
                if is_white(img[i, j]) or is_black(gt[i, j]):
                    union += 1

        # if union != 0:
            # count += 1
            # iou += intersect / union
        rectangle_coords=bb_gt
        x1=int(rectangle_coords[0])
        y1=int(rectangle_coords[1])
        x2=int(rectangle_coords[2])
        y2=int(rectangle_coords[3])

        iou= intersect / (abs(x1-x2)*abs(y1-y2))

        return iou

In [None]:
# test Dataset
from IPython.display import display, Image
'''
test_ds = RoadDataset(
    pd.DataFrame([{'path':'./road_signs_test/road61.jpg'}])['path'],
    pd.DataFrame([{'bb':np.array([0,0,0,0])}])['bb'],
    pd.DataFrame([{'y':[0]}])['y']
)
'''
#see how here test_ds[0]is used for one, similarly loop through all, y_class and y_bb has grouftruth values for label n bb and x is the image value to be sent to model to get results
#good luck

#select what to test

correct_labled = 0

iou=0
k=0

for i,n in enumerate(test_ds):

  im_path = X_test['new_path'].values[i]
  image = cv2.imread(im_path)
  im = cv2.resize(image, (300,300))
  x, y_class, y_bb = n



  xx = torch.FloatTensor(x[None,])

  # prediction
  out_class, out_bb = model(xx.cuda())

  # predicted class
  #y_pred = torch.max(out_class, 1)
  out_class =out_class.detach().cpu().numpy()
  y_pred = np.argmax(out_class)
  print("real value=",y_class,"Pred for"+str(i)+"=",y_pred)
  # predicted bounding box

  bb_hat = out_bb.detach().cpu().numpy()
  bb_hat = bb_hat.astype(int)

  temp=IoU(im,bb_hat.flatten(),y_bb)
  k=k+1
  iou+=temp
  print("Intersected_fraction="+str(temp))


  if y_pred == y_class:
    correct_labled+=1


  # print("original label was:", y_class)
  # print("predicted label was:", y_pred)

  # print("original bb was:", y_bb)
  # print("predicted bb was:", bb_hat)
  text1="real value="+str(y_class)+"  Pred for"+str(i)+"="+str(y_pred)
  text2="Intersected_fraction="+str(temp)
  plt.imshow(im)
  plt.gca().add_patch(create_corner_rect_pred(bb_hat[0]))
  plt.gca().add_patch(create_corner_rect(y_bb))

  text_x = (y_bb[0] + y_bb[2]) / 2
  text_y = y_bb[1] - 5  # Adjust the y-coordinate to position the text above the rectangle
  plt.text(0, 2, text1, color='blue', ha='left', va='top')
  plt.text(0, 12, text2, color='blue', ha='left', va='top')

  plt.savefig("/content/drive/MyDrive/ML project/100epoch_pT_ResNet_AT_NT_BLRM/"+str(i))
  plt.show()  # Display the current image




# test_loss_class = loss_class/count
# test_bb_class = loss_bb/count

accuracy = correct_labled/(i+1)

print("---------------------------------------------")
print("total average class label accuracy:" , accuracy)
avg_intersected_fraction= iou/k

print("Average Intersected area=",avg_intersected_fraction)

# print("total average class loss:" , test_loss_class)




In [None]:
print("Average Intersected area=",avg_intersected_fraction)
