In [1]:
!pip install yolov5


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!git clone https://github.com/ultralytics/yolov5.git
!pip install -r yolov5/requirements.txt


fatal: destination path 'yolov5' already exists and is not an empty directory.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
# Include all packages
import os
import cv2
from time import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from yolov5.models.yolo import Model
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torchvision


In [4]:
from google.colab import drive
drive.mount('/content/drive')
import zipfile
with zipfile.ZipFile('/content/drive/MyDrive/DL Project/DataSet.zip', 'r') as zip_ref:
    zip_ref.extractall('./')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:

def ResizeImage(image: np.ndarray, x1: int, y1: int, x2: int, y2: int, newWidth: int, newHeight: int) -> tuple:
    originalHeight, originalWidth = image.shape[:2]
    widthScale = newWidth / originalWidth
    heightScale = newHeight / originalHeight
    resizedImage = cv2.resize(
        image, (newWidth, newHeight), interpolation=cv2.INTER_LINEAR)
    x1New, y1New = int(x1 * widthScale), int(y1 * heightScale)
    x2New, y2New = int(x2 * widthScale), int(y2 * heightScale)
    return resizedImage, x1New, y1New, x2New, y2New


In [6]:
def LoadDataSet(dataSetFolderPath: str) -> tuple:
    images = []
    annotations = []
    annotationsFilePath = dataSetFolderPath+"/allAnnotations.csv"
    annotationsDataFrame = pd.read_csv(annotationsFilePath, sep=";")
    uniqueSigns = annotationsDataFrame['Annotation tag'].unique().tolist()
    for index, row in annotationsDataFrame[1:].iterrows():
        image = cv2.imread(dataSetFolderPath+"/"+row[0])
        images.append(image)
        annotations.append(
            [uniqueSigns.index(row[1]), row[2], row[3], row[4], row[5]])

    del annotationsDataFrame

    return images, annotations, len(uniqueSigns)


In [7]:
def PreProcessDataSet(images: list, annotations: list, batchSize: int, resize: tuple) -> tuple:
    resizedImages = []
    newAnnotations = []
    for i, image in enumerate(images):
        [label, x1, y1, x2, y2] = annotations[i]
        resizedImage, x1New, y1New, x2New, y2New = ResizeImage(
            image, x1, y1, x2, y2, resize[0], resize[1])
        resizedImages.append(resizedImage)
        newAnnotations.append(
            [(i % batchSize), label, x1New, y1New, x2New, y2New])

    X_train, X_val, y_train, y_val = train_test_split(
        resizedImages, newAnnotations, test_size=0.3, random_state=42)

    return X_train, X_val, y_train, y_val


In [8]:
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        inputData, label = self.data[idx]

        if self.transform:
            inputData = self.transform(inputData)
        inputData = torch.from_numpy(inputData).float()
        label = torch.tensor(label).float()
        return inputData, label


In [9]:
def CreateDataLoaders(X_train, X_val, y_train, y_val, batchSize):
    trainDataSet = []
    valDataSet = []
    for i in range(len(X_train)):
        trainDataSet.append((X_train[i], y_train[i]))

    for i in range(len(X_val)):
        valDataSet.append((X_val[i], y_val[i]))

    trainDataSet = CustomDataset(trainDataSet)
    valDataSet = CustomDataset(valDataSet)
    trainDataLoader = DataLoader(
        trainDataSet, batch_size=batchSize, shuffle=True, num_workers=4)
    valDataLoader = DataLoader(
        valDataSet, batch_size=batchSize, shuffle=False, num_workers=4)

    return trainDataLoader, valDataLoader


In [10]:
def TargetstoTensors(targets, batch_size, numAnchors, gridSizes, numClasses):
    targetObj = []
    targetClass = []
    targetBox = []
    # batch_size = 32
    for grid_size in gridSizes:
        targetObj.append(torch.zeros(
            (batch_size, numAnchors, grid_size, grid_size, 1)))
        targetClass.append(torch.zeros(
            (batch_size, numAnchors, grid_size, grid_size, numClasses)))
        targetBox.append(torch.zeros(
            (batch_size, numAnchors, grid_size, grid_size, 4)))
    # current_batch_size = int(targets[:, 0].max()) + 1
    # print("current_batch_size",targets.size(0))
    for target in targets:
        batchIndex, cls, xCenter, yCenter, width, height = target.long()

        for i, grid_size in enumerate(gridSizes):

            x_cell, y_cell = int(
                xCenter * grid_size), int(yCenter * grid_size)
            anchor = 0
            try:
                targetObj[i][batchIndex, anchor, y_cell, x_cell, 0] = 1
                targetClass[i][batchIndex, anchor, y_cell, x_cell, cls] = 1
                targetBox[i][batchIndex, anchor, y_cell, x_cell] = torch.tensor(
                    [xCenter, yCenter, width, height])
            except Exception as e:
                # print(e)
                pass
    return targetObj, targetClass, targetBox


In [11]:
class YOLOv5Loss(nn.Module):
    def __init__(self, numClasses, numAnchors=3):
        super(YOLOv5Loss, self).__init__()
        self.numClasses = numClasses
        self.numAnchors = numAnchors

    def forward(self, preds, targets):
        objectLoss = torch.tensor(0.0, device=preds[0].device)
        classLoss = torch.tensor(0.0, device=preds[0].device)
        boxLoss = torch.tensor(0.0, device=preds[0].device)
        batch_size = preds[0].size(0)
        gridSizes = [pred.size(2) for pred in preds]
        targetObjList, targetClassList, targetBoxList = TargetstoTensors(
            targets, batch_size, self.numAnchors, gridSizes, self.numClasses)
        for i, pred in enumerate(preds):
            # pred_obj = pred[..., 4].sigmoid()
            # pred_cls = pred[..., 5:].sigmoid()
            # pred_box = pred[..., :4]

            targetObj = targetObjList[i].to(pred.device)
            targetClass = targetClassList[i].to(pred.device)
            targetBox = targetBoxList[i].to(pred.device)

            objectLoss += nn.BCEWithLogitsLoss()(pred[..., 4:5], targetObj)
            classLoss += nn.BCEWithLogitsLoss()(pred[..., 5:], targetClass)
            boxLoss += nn.MSELoss()(pred[..., :4], targetBox)

        totalLoss = objectLoss + classLoss + boxLoss
        return totalLoss


In [12]:
def CreateYolov5Model(numClasses: int, version="s"):
    congfigFile = "yolov5/models/yolov5{}.yaml".format(version)
    model = Model(congfigFile, ch=3, nc=numClasses)
    return model


In [13]:
def TrainModel(model, dataLoader, epochs, optimizer, lossFunction, device):
    model.train()
    for epoch in range(epochs):
        print("Epoch {}/{}:".format(epoch+1, epochs))
        startTime = time()
        totalLoss = 0
        dataLoaderLen = len(dataLoader)
        for i, (inputs, targets) in enumerate(dataLoader):
            inputs = inputs.permute(0, 3, 1, 2)
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                loss = lossFunction(outputs, targets)
                loss.backward()
                optimizer.step()

            totalLoss += loss.item() * inputs.size(0)
            if(((i*100)//dataLoaderLen) % 10 == 0):
                print((i*100//dataLoaderLen), end="%,")

        endTime = time()
        timeTaken = endTime-startTime
        epochLoss = totalLoss / dataLoaderLen
        print()
        print("Training Loss: {:.4f}".format(epochLoss))
        print("Time taken: {}min, {}, secs".format(timeTaken//60, timeTaken % 60))

    print("Training complete.")
    return model


In [34]:
def ProcessYoloOutput(outputs, inputShape, conf_threshold=0.5, nms_threshold=0.5):
    """
    Converts YOLOv5 outputs into a list of bounding box predictions.
    """
    predictions = []
    for output in outputs:
        # perform non-maximum suppression
        boxes = output[:, :4]
        scores = output[:, 4]
        mask = scores >= conf_threshold
        boxes = boxes[mask]
        scores = scores[mask]
        keep = torchvision.ops.boxes.batched_nms(
            boxes, scores, torch.zeros_like(scores), nms_threshold)
        boxes = boxes[keep]
        scores = scores[keep]

        # convert boxes to x1, y1, x2, y2 format
        boxes[:, [0, 2]] *= inputShape[0]
        boxes[:, [1, 3]] *= inputShape[1]
        boxes[:, [0, 2]] = boxes[:, [0, 2]].clamp(0, inputShape[0] - 1)
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(0, inputShape[1] - 1)
        boxes = boxes[:, [1, 0, 3, 2]]

        # convert scores to confidence values
        conf = scores

        # add predictions to list
        for box, score in zip(boxes, conf):
            predictions.append(
                {'bbox': box.tolist(), 'score': score.item(), 'category_id': 1})

    return predictions


def CalculatemAPScore(predictions, labels):
    """
    Computes the mAP between the predictions and the ground truth labels.
    """
    # create a COCO-format annotations file for the ground truth labels
    coco_gt = COCO()
    for i, (image, boxes) in enumerate(labels):
        for box in boxes:
            x1, y1, x2, y2 = box
            w, h = x2 - x1, y2 - y1
            coco_gt.add_annotation({
                'id': len(coco_gt.dataset['annotations']),
                'image_id': i,
                'category_id': 1,
                'bbox': [x1, y1, w, h],
                'area': w * h,
                'iscrowd': 0,
            })

    # create a COCO-format predictions file
    coco_dt = coco_gt.loadRes(predictions)

    # compute mAP using COCOeval
    coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    return coco_eval.stats[0]

def EvaluateModel(model, dataLoader, device):
    print("Evaluateing Model:")
    startTime = time()
    dataLoaderLen = len(dataLoader)
    mAPScore = 0
    for i, (inputs, targets) in enumerate(dataLoader):
        try:
            inputs = inputs.permute(0, 3, 1, 2)
            inputs = inputs.to(device)
            targets = targets.to(device)
            with torch.no_grad():
                outputs = model(inputs)
                predictions = ProcessYoloOutput(outputs, inputs.shape)
                print(predictions)
                mAPScore = CalculatemAPScore(predictions, targets)
        except:
            pass
        if(((i*100)//dataLoaderLen) % 10 == 0):
            print((i*100//dataLoaderLen), end="%,")

    endTime = time()
    timeTaken = endTime-startTime
    print("mAP score on validation set: {:.4f}".format(mAPScore))
    print("Time taken: {}min, {}, secs".format(timeTaken//60, timeTaken % 60))


In [15]:
batchSize = 32
inputShape = (416, 416)
epochs = 100
numAnchors = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
print("Using {} device".format(device))

Using cuda device


In [17]:
images, annotations, numClasses = LoadDataSet("./DataSet")


In [18]:
X_train, X_val, y_train, y_val = PreProcessDataSet(
    images, annotations, batchSize, inputShape)
del images
del annotations


In [19]:
trainDataLoader, valDataLoader = CreateDataLoaders(
    X_train, X_val, y_train, y_val, batchSize)
del X_train
del y_train
del X_val
del y_val


In [20]:
yolov5Model = CreateYolov5Model(numClasses)
optimizer = optim.Adam(yolov5Model.parameters(), lr=0.001)
yolov5LossFunction= YOLOv5Loss(numClasses=numClasses)
yolov5Model = yolov5Model.to(device)
yolov5LossFunction = yolov5LossFunction.to(device)

Overriding model.yaml nc=80 with nc=47

                 from  n    params  module                                  arguments                     
  0                -1  1      3520  yolov5.models.common.Conv               [3, 32, 6, 2, 2]              
  1                -1  1     18560  yolov5.models.common.Conv               [32, 64, 3, 2]                
  2                -1  1     18816  yolov5.models.common.C3                 [64, 64, 1]                   
  3                -1  1     73984  yolov5.models.common.Conv               [64, 128, 3, 2]               
  4                -1  2    115712  yolov5.models.common.C3                 [128, 128, 2]                 
  5                -1  1    295424  yolov5.models.common.Conv               [128, 256, 3, 2]              
  6                -1  3    625152  yolov5.models.common.C3                 [256, 256, 3]                 
  7                -1  1   1180672  yolov5.models.common.Conv               [256, 512, 3, 2]            

In [21]:
trainedModel = TrainModel(yolov5Model, trainDataLoader, epochs, optimizer, yolov5LossFunction, device)

Epoch 1/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss: 0.8768
Time taken: 1.0min, 35.81042766571045, secs
Epoch 2/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss: 0.0331
Time taken: 1.0min, 28.69503402709961, secs
Epoch 3/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss: 0.0130
Time taken: 1.0min, 28.74847102165222, secs
Epoch 4/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss: 0.0075
Time taken: 1.0min, 29.01439332962036, secs
Epoch 5/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss: 0.0053
Time taken: 1.0min, 28.862300157546997, secs
Epoch 6/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss: 0.0040
Time taken: 1.0min, 28.716861486434937, secs
Epoch 7/100:
0%,0%,10%,20%,20%,30%,30%,40%,40%,50%,50%,60%,70%,70%,80%,80%,90%,90%,
Training Loss:

In [22]:
torch.save(trainedModel.state_dict(), 'trained_yolov5Modelv3.pth')

In [35]:
EvaluateModel(yolov5Model, valDataLoader, device)


Evaluateing Model:
0%,10%,20%,40%,50%,60%,70%,90%,mAP score on validation set: 0.0000
Time taken: 0.0min, 9.598888874053955, secs
