# **This colab file shows Reinforcement Learning to detect objects using YOLOv5 and SKU110K-6799_good_images dataset (which is made from after ignoring the corrupted images from subset of SKU110K dataset (SKU110k_6799))**

This code is copied from below link (Reference link): 

1. https://github.com/hsahib2912/Object-detection-reinforcement-learning

# Import Required Libraries

In [None]:
# Importring libraries
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.utils.data import DataLoader
from shapely.geometry import Polygon
import os
import PIL
import pandas as pd
import numpy as np
from sklearn.metrics import average_precision_score
import seaborn as sn
import matplotlib.pyplot as plt
import cv2

# To Use the Dataset 

* ### There are three ways to use the dataset:

1. Download dataset in your local PC and upload dataset directly on colab notebook by clicking on `Files icon in left panel-->right click and upload--->choose dataset path-->click open`. So it will be uploaded in colab notebook. But keep in mind when notebook is connected, you need to upload every time again.

2. Download dataset using `!wget` command and unzip it using `!unzip` command. In this option, you need to run the command every time once notebook is disconnected, so it will take long time to download and unzip the dataset, it depends on datset size.

3. The best way to use the large datset is to save the dataset in your Google drive and use directly after mounting google drive.

In [None]:
# Method 1 is directly upload in colab as explained in above point 1
# Method 2
# To download and unzip the SKU110K dataset

#!wget {URL link to download dataset} # To download the dataset
#!tar -xvf {path to untar the dataset} #To untar the dataset
#!unzip {path to unzip the dataset} # to unzip the datset

In [None]:
# Method 3
# To mount Google drive to use SKU110K-6799_good_images dataset

from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


# Initializing the Pre - trained Model


In [None]:
# Initialize the pre - trained model that we want to use for reinforcement learning

yolo = torch.hub.load('ultralytics/yolov5','custom', path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/SKU110K-6799_16_1024_50_good_images_best.pt')

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-9-15 torch 1.9.0+cu102 CUDA:0 (Tesla K80, 11441.1875MB)

Fusing layers... 
Model Summary: 224 layers, 7053910 parameters, 0 gradients
Adding AutoShape... 


# Set all Path to the Dataset and Results Folder

1. Set all train, test and validation folders' path from dataset.
2. Set results folder path, where you want to save the results of detected images as well as reinforcement results.


In [None]:
# Set all path
base_img_train_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/SKU110K-6799-good_images/SKU110K-6799-good_images/training_pano/images/'
base_label_train_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/SKU110K-6799-good_images/SKU110K-6799-good_images/training_pano/labels/'
base_img_test_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/SKU110K-6799-good_images/SKU110K-6799-good_images/validation_pano/images/'
base_label_test_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/SKU110K-6799-good_images/SKU110K-6799-good_images/validation_pano/labels/'
base_img_q_results_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/test_results/'
yolo_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/'
detected_images_path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/detected_images/'

# Define all the Functions that are Used to Draw Evaluation Curves and also used while Training, Testing and Detecting Procedure

1. Function to Get the Ground Truth Co - Ordinates of Bounding Boxes in Numpy Array.

In [None]:
def get_labels_into_pandas(img,pth):
    img = img[:len(img)-3]+'txt'
    path = pth+img
    df = pd.read_csv(path,delimiter = ' ',header  = None)
    df.drop(0, inplace=True, axis=1)
    return np.array(df)

2. Function to Get the Results from YOLOV5 Pre - trained Model

In [None]:
def get_yolo_results(img,base_img_path):
    result = yolo(base_img_path+img)
    df = result.pandas().xywhn[0]
    df.drop('confidence',inplace = True,axis = 1)
    df.drop('class',inplace = True,axis = 1)
    df.drop('name',inplace = True,axis = 1)
    return np.array(df)

3. This Function Sorts Ground Truth Labels According to the Results We Get While Passing Images to YOLOV5 Model; Such that Absolute Distance between Ground Truth and Prediction is Minimum.

In [None]:
def get_sorted(x,y):
    res = []
    for i in x:
        min_diff = 100000
        im = y[0]
        for j in y:
            diff = abs(i[0]-j[0])+abs(i[1]-j[1])+abs(i[2]-j[2])+abs(i[3]-j[3])
            if(diff<min_diff):
                min_diff = diff
                im = j
        res.append(im)
    return np.array(res)

4. Function to Compute Intersection Of Union

In [None]:
def compute_IOU(b1,b2):
    xmin1,xmax1,ymin1,ymax1 = b1[0],b1[1],b1[2],b1[3]
    xmin2,xmax2,ymin2,ymax2 = b2[0],b2[1],b2[2],b2[3]
    p1 = Polygon([[xmin1,ymin1],[xmax1,ymin1],[xmax1,ymax1],[xmin1,ymax1]])
    p2 = Polygon([[xmin2,ymin2],[xmax2,ymin2],[xmax2,ymax2],[xmin2,ymax2]])
    a = p1.intersection(p2).area
    b = p1.union(p2).area
    if(b!=0):
      iou = a / b
      return iou
    else:
      return 0

5. Function to Transform Labels from : [ x_center, y_center, width, height ] to => [ x_min, x_max, y_min, y_max ] 

In [None]:
def transform_y(y,w,h):
    y[:,0] *= w
    y[:,1] *= h
    y[:,2] *= w
    y[:,3] *= h
    for i in range(len(y)):
        xmin = y[i][0]-(y[i][2]/2)
        xmax = y[i][0]+(y[i][2]/2)
        ymin = y[i][1]-(y[1][3]/2)
        ymax = y[i][1]-(y[1][3]/2)
        y[i][0],y[i][1],y[i][2],y[i][3] = xmin/w,xmax/w,ymin/h,ymax/h
    return y

In [None]:
len(os.listdir(base_img_test_path)) # to find the no. of test samples

350

6. Function to Write Results We Get from DeepQNetwork into CSV

In [None]:
def write_csv(full_y,img):
  img = img[:len(img)-3]+'csv'
  path = '/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/test_results/'+img
  df = pd.DataFrame(full_y)
  df.to_csv(path,index = False)

7. This Function Computes Minimum Squared Error

In [None]:
def get_mse(y1,y2):
    s = 0
    for i in range(len(y1)):
        s = (y1[i] - y2[i])**2
    return s/len(y1)

8. Function to Define the Deep Q Network

In [None]:
class DeepQNetwork(nn.Module):
    def __init__(self):
        super(DeepQNetwork, self).__init__()
        self.hidden1 = nn.Linear(5,100)
        nn.init.xavier_uniform_(self.hidden1.weight)
        self.activation1 = nn.Tanh()
        self.d1 = nn.Dropout(p = 0.2)
        self.hidden2 = nn.Linear(100,1000)
        nn.init.xavier_uniform_(self.hidden2.weight)
        self.activation2= nn.Tanh()
        self.d2 = nn.Dropout(p = 0.2)
        self.hidden3= nn.Linear(1000,4)
        nn.init.xavier_uniform_(self.hidden3.weight)
        self.activation3 = nn.Tanh()
        
    def forward(self,X):
        X = self.hidden1(X)
        X = self.activation1(X)
        X = self.d1(X)
        X = self.hidden2(X)
        X = self.activation2(X)
        X = self.d2(X)
        X = self.hidden3(X)
        X = self.activation3(X)
        return X

9. Function to Train the Model

In [None]:
def train(model):
    loss_l = []
    mse = nn.MSELoss()
    opt = SGD(model.parameters(),lr = 0.01,momentum = 0.9)
    cnt = 0
    img_list = os.listdir(base_img_train_path)
    loss_list = []
    for img in img_list:
        w,h = PIL.Image.open(base_img_train_path+img).size
        x = torch.from_numpy(get_yolo_results(img,base_img_train_path))
        y = get_sorted(x,get_labels_into_pandas(img,base_label_train_path))
        y = transform_y(y,w,h)
        y = torch.from_numpy(y)
        reward = torch.tensor([1])
        if (len(x)!=0 and len(y)!=0):
            for epoch in range(1):
                for i in range(len(x)):
                    opt.zero_grad()
                    new_x = torch.cat((x[i],reward))
                    yhat = model(new_x.float())
                    loss = mse(yhat.float(),y[i].float())
                    loss.backward()
                    iou = compute_IOU(yhat,y[i])
                    if(iou>0.8):# To set threshold value at 0.8
                        reward = torch.tensor([1])
                    else:
                        reward = torch.tensor([-1])
                    opt.step()
        loss_list.append(loss)
        if(cnt%200==0):
            print('Training sample = ',cnt)
            print('loss = ',loss)
        cnt+=1
    torch.save(model.state_dict(),'/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/DenseQNet.pt')
    loss_df = pd.DataFrame(loss_list)
    loss_df.to_csv('/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/loss.csv',index = False)

10. Funcion to Test the Model. It Stores Co - Ordinates of Bounding Boxes to a CSV file.

In [None]:
def test(model):
  mse_l = []
  iou_l = []
  img_list = os.listdir(base_img_test_path)
  cnt = 0
  for img in img_list:
    try:
      w,h = PIL.Image.open(base_img_test_path+img).size
      x = torch.from_numpy(get_yolo_results(img,base_img_test_path))
      y_org = get_sorted(x,get_labels_into_pandas(img,base_label_test_path))
      y = transform_y(y_org,w,h)
      y = torch.from_numpy(y)
      reward = torch.tensor([1])
      iou = mse = 0
      full_y = []
      if (len(x)!=0 and len(y)!=0):
          for i in range(len(x)):
              new_x = torch.cat((x[i],reward))
              yhat = model(new_x.float())
              mse += get_mse(yhat,y[i]).float()
              iou += compute_IOU(y_org[i],x[i])
              full_y.append(list(map(abs,yhat.tolist())))
      iou_l.append(iou/len(y_org))
      mse_l.append(mse/len(y_org))
      write_csv(full_y,img)
      if(cnt%20==0):
        print('Testing Sample = ',cnt)
      cnt+=1
    except:
      print("Skipping ",img)

# Train the Model

In [None]:
model = DeepQNetwork()
train(model)

Training sample =  0
loss =  tensor(0.09775, grad_fn=<MseLossBackward>)
Training sample =  200
loss =  tensor(6.10891e-05, grad_fn=<MseLossBackward>)
Training sample =  400
loss =  tensor(0.00784, grad_fn=<MseLossBackward>)
Training sample =  600
loss =  tensor(0.00414, grad_fn=<MseLossBackward>)
Training sample =  800
loss =  tensor(0.00126, grad_fn=<MseLossBackward>)
Training sample =  1000
loss =  tensor(0.00392, grad_fn=<MseLossBackward>)
Training sample =  1200
loss =  tensor(0.00037, grad_fn=<MseLossBackward>)
Training sample =  1400
loss =  tensor(0.00145, grad_fn=<MseLossBackward>)
Training sample =  1600
loss =  tensor(0.00467, grad_fn=<MseLossBackward>)
Training sample =  1800
loss =  tensor(0.00029, grad_fn=<MseLossBackward>)
Training sample =  2000
loss =  tensor(0.00228, grad_fn=<MseLossBackward>)
Training sample =  2200
loss =  tensor(0.00054, grad_fn=<MseLossBackward>)
Training sample =  2400
loss =  tensor(0.00399, grad_fn=<MseLossBackward>)
Training sample =  2600
loss

# To initialize the Evaluation Mode

In [None]:
model = DeepQNetwork()
model.load_state_dict(torch.load('/content/gdrive/MyDrive/Object_Detection_in_Dense_Conditions/Reinforcement_results/DenseQNet.pt'))
model.eval()

DeepQNetwork(
  (hidden1): Linear(in_features=5, out_features=100, bias=True)
  (activation1): Tanh()
  (d1): Dropout(p=0.2, inplace=False)
  (hidden2): Linear(in_features=100, out_features=1000, bias=True)
  (activation2): Tanh()
  (d2): Dropout(p=0.2, inplace=False)
  (hidden3): Linear(in_features=1000, out_features=4, bias=True)
  (activation3): Tanh()
)

# Test the Model

In [None]:
test(model)

Testing Sample =  0
Testing Sample =  20
Testing Sample =  40
Testing Sample =  60
Testing Sample =  80
Testing Sample =  100
Testing Sample =  120
Testing Sample =  140
Testing Sample =  160
Testing Sample =  180
Testing Sample =  200
Testing Sample =  220
Testing Sample =  240
Testing Sample =  260
Testing Sample =  280
Testing Sample =  300
Testing Sample =  320
Testing Sample =  340


11. Function to Compute the Confusion Matrix

In [None]:
def compute_confusion_matrix(model):
  map_l = []
  tp_l = []
  fp_l = []
  fn_l = []
  img_list = os.listdir(base_img_test_path)
  cnt = 0
  for img in img_list:
    tp = fp = fn = map = 0
    csv_path = img[:len(img)-3]+'csv'
    w,h = PIL.Image.open(base_img_test_path+img).size
    x = get_yolo_results(img,base_img_test_path)
    y_org = get_labels_into_pandas(img,base_label_test_path)
    y = transform_y(y_org,w,h)
    fn = abs(len(x)-len(y_org))
    yhat = pd.read_csv(base_img_q_results_path+csv_path).to_numpy()
    vec = [1,1,0,0]
    for i in range(min(len(yhat),len(y))):
      err = get_mse(yhat[i],y[i])
      if(err>0.1):
        fp+=1
      else:
        tp+=1
      map += average_precision_score(vec,yhat[i])
    tp_l.append(tp)
    fp_l.append(fp)
    fn_l.append(fn)
    map_l.append(map/len(yhat))
    if(cnt%20 == 0):
      print("At image = ",cnt)
    cnt+=1
  pd.DataFrame(tp_l).to_csv(yolo_path+'tp.csv',index = False)
  pd.DataFrame(fp_l).to_csv(yolo_path+'fp.csv',index = False)
  pd.DataFrame(fn_l).to_csv(yolo_path+'fn.csv',index = False)
  pd.DataFrame(map_l).to_csv(yolo_path+'map.csv',index = False)

In [None]:
compute_confusion_matrix(model)

At image =  0
At image =  20
At image =  40
At image =  60
At image =  80
At image =  100
At image =  120
At image =  140
At image =  160
At image =  180
At image =  200
At image =  220
At image =  240
At image =  260
At image =  280
At image =  300
At image =  320
At image =  340


# To Find the Evaluation Matrix

In [None]:
# Reference links:
# 1)https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
# 2)https://www.geeksforgeeks.org/python-pandas-series-tolist/
# 3)https://www.analyticsvidhya.com/blog/2020/09/precision-recall-machine-learning/
tp_l = pd.read_csv(yolo_path+'tp.csv')['0'].to_list()
fp_l = pd.read_csv(yolo_path+'fp.csv')['0'].to_list()
fn_l = pd.read_csv(yolo_path+'fn.csv')['0'].to_list()
map_l = pd.read_csv(yolo_path+'map.csv')['0'].to_list()
tp = sum(tp_l)
fp = sum(fp_l)
fn = sum(fn_l)
total = tp+fp+fn
map = sum(map_l)/len(map_l)
print('True Positives = ',tp)
print('False Positives = ',fp)
print('False Neagtives = ',fn)
print('Mean Average Precision = ',map)
print("Accuracy = ",100*tp/total,'%')
print("Precision = ",100*tp/(fp+tp),'%')
print('Recall = ',100*tp/(fn+tp),'%')

True Positives =  49148
False Positives =  2368
False Neagtives =  6923
Mean Average Precision =  0.6561117988969201
Accuracy =  84.10137066000445 %
Precision =  95.40336982684991 %
Recall =  87.65315403684615 %


# To Draw and Save Confusion Matrix Figure

In [None]:
arr = [[tp,fp],[fn,0]]
confusion_df = pd.DataFrame(arr,index = ['Positive','Negative'],columns = ['Positive','Negative'])
plt.figure(figsize = (10,7))
sn.heatmap(confusion_df,annot=True,annot_kws={"size": 15})
plt.ylabel('Predicted Values')
plt.xlabel('Actual Values')
plt.savefig(yolo_path+'confusion.png')

# To Draw and Save Loss Curve

In [None]:
plt.clf()
loss_l = pd.read_csv(yolo_path+'loss.csv')['0'].to_list()
loss_l = [float(i[7:14]) for i in loss_l]
plt.plot(loss_l)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.savefig(yolo_path+'loss.png')

# To Draw and Save mAP(Mean Average Precision) Curve

In [None]:
plt.clf()
map_l = pd.read_csv(yolo_path+'map.csv')['0'].to_list()
mean = [0.6561117988969201 for i in range(len(map_l))]
plt.plot(loss_l)
plt.plot(mean)
plt.xlabel('Image')
plt.ylabel('Average Precision')
plt.savefig(yolo_path+'map.png')

12. Function to detect objects from Images

In [None]:
def k():
  return int(np.random.normal(0,30,1)[0])
def get_images():
  img_list = os.listdir(base_img_test_path)
  cnt = 0
  for img in img_list:
    path = base_img_test_path+img
    cvimg = cv2.imread(path)
    res = yolo(path).pandas().xyxy[0]
    lab = img[:len(img)-3]+'csv'
    for i in range(len(res)):
      xmin = int(res['xmin'][i]+k())
      xmax = int(res['xmax'][i]+k())
      ymax = int(res['ymax'][i]+k())
      ymin = int(res['ymin'][i]+k())
      cvimg = cv2.rectangle(cvimg, (xmin,ymin), (xmax,ymax), (120,255,255), 10)
      cv2.putText(cvimg, str(res['confidence'][i])[:5], (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (120,255,255), 5)
    cv2.imwrite(detected_images_path+img, cvimg)
    if(cnt%20 == 0):
      print('At image = ',cnt)
    cnt+=1

# To Save Detected Images

In [None]:
get_images()

At image =  0
At image =  20
At image =  40
At image =  60
At image =  80
At image =  100
At image =  120
At image =  140
At image =  160
At image =  180
At image =  200
At image =  220
At image =  240
At image =  260
At image =  280
At image =  300
At image =  320
At image =  340
