<h1>1. Install Dependencies</h1>

In [None]:
# Clone Ultratytics' yolov5 github repository to the Desktop

## git clone https://github.com/ultralytics/yolov5

In [None]:
# Install Pytorch via conda
# conda config --set ssl_verify false

## conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia

# Python version 3.9.13
# Installed Pytorch via conda - version 1.13.0
# Installed torchvision via conda - version 0.14.0
# Installed torchaudio via conda - version 0.13.0
# CUDA version 11.7

In [None]:
# Install yolov5 requirements

## cd Desktop/yolov5
## pip install -r requirements.txt

<h3>Import Python Modules</h3>

In [10]:
import torch
from matplotlib import pyplot as plt
import numpy as np
import cv2

import time
import math

In [2]:
# Check Pytorch Usability

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

True
1
0
<torch.cuda.device object at 0x0000019694F71760>
Quadro M1200


<h3>Check Working Directory</h3>

In [3]:
pwd # Print working directory

'C:\\Users\\Z0142848\\Desktop\\industrial_vision\\yolov5'

In [4]:
import os    
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

<h1>2. Load Model</h1>

In [5]:
# Import module from Ultratytics repo

model = torch.hub.load('ultralytics/yolov5', 'yolov5n')
#model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

Using cache found in C:\Users\Z0142848/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2022-11-22 Python-3.9.13 torch-1.13.0 CUDA:0 (Quadro M1200, 4096MiB)

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients
Adding AutoShape... 


In [6]:
# Check Model in Use

model

AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 16, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
  

<h1>3. Make Detections</h1>

In [None]:
# First prediction

img = 'https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/zidane.jpg'

In [None]:
# Make detections of image from model, and print the result

results = model(img)
results.print()

In [None]:
%matplotlib inline
plt.imshow(np.squeeze(results.render()))
plt.show()

In [None]:
results.render()

In [None]:
results.xyxy

In [None]:
# Second Prediction

img2 = 'traffic.jpg'

In [None]:
results2 = model(img2)
%matplotlib inline
plt.imshow(np.squeeze(results2.render()))
plt.show()

<h1>4. Real Time Detection</h1>

In [None]:
cap = cv2.VideoCapture('traffic0.mp4')
startTime = 0
while cap.isOpened():
    ret, frame = cap.read()
    
    #Make detections
    results = model(frame)
    
    #results.print()
    #cv2.line(np.squeeze(results.render()), (0, 0), (1000, 1000),(0,255,0), 10)
    
    #FPS
    currentTime = time.time()
    fps = 1/(currentTime - startTime)
    startTime = currentTime
    cv2.putText(frame, "FPS: " + str(int(fps)), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0),3)  
    
    cv2.imshow('YOLO', np.squeeze(results.render()))
    #cv2.imshow('YOLO', frame)

    if cv2.waitKey(10) & 0xFF == ord('q'): 
        break
cap.release()
cv2.destroyAllWindows()

In [11]:
cap = cv2.VideoCapture('traffic0.mp4')
#cap = cv2.VideoCapture(0)
font = cv2.FONT_HERSHEY_SIMPLEX
startTime = 0

while cap.isOpened():
    ret, frame = cap.read()
    distances = []
    
    #Make detections
    results = model(frame)
    #results.print()
    #cv2.line(frame, (100, 100), (1000, 1000),(255,255,255), 10)
    
    #cv2.putText(frame, 'FPS: ', (20, 40), font, 1, (0, 255, 255), 2)
    #Create the basic black image
    #mask = np.zeros(frame.shape[:2], dtype="uint8")
    
    for i in range(len(results.xyxy[0])):
        if i == 0:
            iv_centroidX = (results.xyxy[0][i][0].item()+results.xyxy[0][i][2].item())/2
            iv_centroidY = (results.xyxy[0][i][1].item()+results.xyxy[0][i][3].item())/2
            #cv2.rectangle(mask,(x1,y1),(x2,y2), (COLOR_WHITE), -1)
            org1 = (int(iv_centroidX), int(iv_centroidY))
            #print(org)
            # PRINT CENTROIDS
            cv2.circle(frame, org1, 2, (255,255,255), 2)
        else:
            iv_centroidX = (results.xyxy[0][i][0].item()+results.xyxy[0][i][2].item())/2
            iv_centroidY = (results.xyxy[0][i][1].item()+results.xyxy[0][i][3].item())/2
            #cv2.rectangle(mask,(x1,y1),(x2,y2), (COLOR_WHITE), -1)
            iv_centroidXp = (results.xyxy[0][i-1][0].item()+results.xyxy[0][i-1][2].item())/2
            iv_centroidYp = (results.xyxy[0][i-1][1].item()+results.xyxy[0][i-1][3].item())/2
            org1 = (int(iv_centroidX), int(iv_centroidY))
            org2 = (int(iv_centroidXp), int(iv_centroidYp))
            #print(org)
            # PRINT CENTROIDS
            cv2.circle(frame, org1, 2, (245,245,245), 2)
            cv2.line(frame, org1, org2, (255,255,255), 1)
            distances.append([org1, org2])
        
        
    #FPS
    currentTime = time.time()
    fps = 1/(currentTime - startTime)
    startTime = currentTime
    cv2.putText(frame, "FPS: " + str(int(fps)), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0),3)   
    
    cv2.imshow('YOLO', np.squeeze(results.render()))
    
    #for i in range (len(results.xyxy[0])):
    #    iv_centroidX = (results.xyxy[0][i][0].item()+results.xyxy[0][i][2].item())/2
    #    iv_centroidY = (results.xyxy[0][i][1].item()+results.xyxy[0][i][3].item())/2
    #    iv_class = results.xyxy[0][i][5].item()
    #    #iv_name = results.xyxy[0][i][6].value()
    #    print(iv_centroidX, iv_centroidY)
    #    #cv2.circle(np.squeeze(results.render()), (iv_centroid[0], iv_centroid[1]), 20, (255,255,255), 2)
    #    #cv2.putText(frame, 'x', (iv_centroidX, iv_centroidY), font, 1, (255, 0, 0), 2)
        
    if cv2.waitKey(2) & 0xFF == ord('p'):
        print(f'*** DB COMMIT ***')
        #d=√((x_2-x_1)²+(y_2-y_1)²)
        for i in range(len(distances)):
            print(f'DISTANCE {1}: {math.sqrt((distances[i][1][0]-distances[i][0][0])**2+(distances[i][1][1]-distances[i][0][1])**2)}')
    
    
    if cv2.waitKey(2) & 0xFF == ord('q'): 
        break
cap.release()
cv2.destroyAllWindows()

*** DB COMMIT ***
92.17917335276988
59.464274989274024
173.44163283364233
205.1828452868319
59.64059020499378
*** DB COMMIT ***
169.95587662684687
317.73731288597503
98.37174391053561
66.18912297349165
3.1622776601683795
199.56452590578317
323.5954264200902
*** DB COMMIT ***
192.4811679100062
406.09974144291203
324.56894491001447


<h1>5. Train from Scratch</h1>

In [None]:
# Import dependencies

import uuid
import os
import time

In [None]:
IMAGES_PATH = os.path.join('data', 'images') # Where the images are going to be saved
labels = ['tape', 'label', 'citroen', 'peugeot'] # Labels used
number_images = 18 # Number of images to take pictures

In [None]:
cap = cv2.VideoCapture(0)
# Loop through labels
for label in labels:
    print('Collecting images for {}'.format(label))
    time.sleep(5)
    
    #Loop through image range
    for img_num in range(number_images):
        print('Collecting images for {}, image number {}'.format(label, img_num))
        
        # Web cam feed
        ret, frame = cap.read()
        
        # Naming out image path
        imgname = os.path.join(IMAGES_PATH, label+'.'+str(uuid.uuid1())+'.jpg')
        
        # Writes out image path
        cv2.imwrite(imgname, frame)
        
        # Render to the screen 
        cv2.imshow('Image Collection', frame)
        
        # Give time between capture iteractions
        time.sleep(1.5)
        
        # Break character
        if cv2.waitKey(10) & 0xff == ord('q'):
            break

# Release all windows
cap.release()
cv2.destroyAllWindows()

<h2>Label Images</h2>

In [None]:
# Clone git repository for the labeling of images
!git clone https://github.com/tzutalin/labelImg

In [None]:
# Install dependencies
!pip install pyqt5 lxml --upgrade
!cd labelImg && pyrcc5 -o libs/resources.py resources.qrc

<h3>Train Model</h3>

<h5>Explanation of attributes</h5>
<ol>
    <li><b>batch:</b> batch size</li>
    <li><b>epochs:</b> number of epochs to train for</li>
    <li><b>data:</b> yaml file that contains info about the dataset (path of images, labels...)</li>
    <li><b>workers:</b> number of CPU workers</li>
    <li><b>cfg:</b> model architecture</li>
    <li><b>weights:</b> pretrained weights we want to start training for</li>
    <li><b>name:</b> train logs; weights to be stored on folder names runs/train/name</li>
    <li><b>hyp:</b> yaml file that describes hyperparameter choices</li>
</ol>

In [None]:
!cd yolov5 && python train.py --img 320 --batch 32 --epochs 1000 --data dataset.yaml --weights yolov5s.pt

<h1>6. Load Custom Model</h1>

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/runs/train/exp9/weights/last.pt', force_reload=True)

In [None]:
test_img = 'label.e6fd052c-5f56-11ed-a1b7-047bcb5a1a09.jpg'
img = os.path.join('data', 'images', test_img)
results = model(img)
results.print()

In [None]:
%matplotlib inline
plt.imshow(np.squeeze(results.render()))
plt.show()

In [None]:
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
    #Make detections
    results = model(frame)
    
    cv2.imshow('YOLO', np.squeeze(results.render()))
    if cv2.waitKey(10) & 0xFF == ord('q'): 
        break
cap.release()
cv2.destroyAllWindows()

<h3>Additional Tests</h3>

In [None]:
# Crop images detected to an external page
crops = results.crop(save=True)
crops

In [None]:
results

In [None]:
results.pandas().xyxy[0]

In [None]:
results.pandas()

In [None]:
# Print coordinates and confidence for names and classes
print(results.pandas().xyxy[0].keys())

# print(type(results.xyxy[0]))

# for tensor in results.xyxy[0]:
#     print(tensor[].item())

for i in range (len(results.xyxy[0])):
    centroid = [(results.xyxy[0][i][0].item()+results.xyxy[0][i][2].item())/2, (results.xyxy[0][i][1].item()+results.xyxy[0][i][3].item())/2]
    print(centroid)
    #for j in range(len(results.xyxy[0][i])):
        #print(results.xyxy[0][i][j])

In [None]:
print(results.xyxy[0][0][0].item())
print(results.xyxy[0][0][2].item())
print(results.xyxy[0][0][2].item() - results.xyxy[0][0][0].item())

In [None]:
results.xyxy

In [None]:
import requests
requests.get("http://google.com")