# YOLOv5s BIN Tracking Thermal

This repository includes training of Binary Thermal Tracking og YOLOv5s object detection model.

# Setup

In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone repo
!pip install -qr yolov5/requirements.txt  # install dependencies (ignore errors)
%cd yolov5

import torch
from IPython.display import Image, clear_output  # to display images
from utils.google_utils import gdrive_download  # to download models/datasets

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.7.0+cu101 CPU


# Download Correctly Formatted Custom Dataset 

This project use Roboflow.com to convert the data to the correct format.

In [None]:
# Export code snippet and paste here
%cd /content
!curl -L " " > roboflow.zip; unzip roboflow.zip; rm roboflow.zip

In [None]:
# this is the YAML file Roboflow wrote for us that we're loading into this notebook with our data
%cat data.yaml

# Change classes from multi-class to binary

In [None]:
%%writefile /content/data.yaml
train: ../train/images
val: ../valid/images

nc: 1
names: ['Maritime Object']

Writing /content/data.yaml


In [None]:
import os

directory = '/content/valid/labels/'

for navn in os.listdir(directory):
    if navn.endswith(".txt"):

        # Open the file as read
        filename = open(directory + navn, "r+")
        # Create an array to hold write data
        new_file = []
        # Loop the file line by line
        for line in filename:
            # Split A,B on , and use first position [0], aka A, then add to the new array
            line_splitted = line.split(" ")
            # Add
            print(line_splitted)

            label = line_splitted[0]
            x_cen = float(line_splitted[1])
            y_cen = float(line_splitted[2])
            width = float(line_splitted[3])
            height = float(line_splitted[4])

            new_string = str(0) + ' ' + str(x_cen) + ' ' + str(y_cen) + ' ' + str(width) + ' ' + str(height)
            new_file.append(new_string)

        # Open the file as Write, loop the new array and write with a newline
        with open(directory + navn, "w+") as f:
            for i, item in enumerate(new_file):
                if i == (len(new_file) - 1):
                    f.write(item)
                else:
                    f.write(item + '\n')

In [None]:
import os

directory = '/content/train/labels/'

for navn in os.listdir(directory):
    if navn.endswith(".txt"):

        # Open the file as read
        filename = open(directory + navn, "r+")
        # Create an array to hold write data
        new_file = []
        # Loop the file line by line
        for line in filename:
            # Split A,B on , and use first position [0], aka A, then add to the new array
            line_splitted = line.split(" ")
            # Add
            print(line_splitted)

            label = line_splitted[0]
            x_cen = float(line_splitted[1])
            y_cen = float(line_splitted[2])
            width = float(line_splitted[3])
            height = float(line_splitted[4])

            new_string = str(0) + ' ' + str(x_cen) + ' ' + str(y_cen) + ' ' + str(width) + ' ' + str(height)
            new_file.append(new_string)

        # Open the file as Write, loop the new array and write with a newline
        with open(directory + navn, "w+") as f:
            for i, item in enumerate(new_file):
                if i == (len(new_file) - 1):
                    f.write(item)
                else:
                    f.write(item + '\n')

In [None]:
import os

directory = '/content/test/labels/'

for navn in os.listdir(directory):
    if navn.endswith(".txt"):

        # Open the file as read
        filename = open(directory + navn, "r+")
        # Create an array to hold write data
        new_file = []
        # Loop the file line by line
        for line in filename:
            # Split A,B on , and use first position [0], aka A, then add to the new array
            line_splitted = line.split(" ")
            # Add
            print(line_splitted)

            label = line_splitted[0]
            x_cen = float(line_splitted[1])
            y_cen = float(line_splitted[2])
            width = float(line_splitted[3])
            height = float(line_splitted[4])

            new_string = str(0) + ' ' + str(x_cen) + ' ' + str(y_cen) + ' ' + str(width) + ' ' + str(height)
            new_file.append(new_string)

        # Open the file as Write, loop the new array and write with a newline
        with open(directory + navn, "w+") as f:
            for i, item in enumerate(new_file):
                if i == (len(new_file) - 1):
                    f.write(item)
                else:
                    f.write(item + '\n')

# Define Model Configuration and Architecture

Change config YAML file for the model to be specified for the custom dataset.

In [None]:
# define number of classes based on YAML
import yaml
with open("data.yaml", 'r') as stream:
    num_classes = str(yaml.safe_load(stream)['nc'])

In [None]:
#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))


In [None]:
%%writetemplate /content/yolov5/models/custom_yolov5l.yaml

# parameters
nc: 1  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, BottleneckCSP, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]


# Train the YOLOv5 Detector

Arguments:
- **img:** define input image size
- **batch:** determine batch size
- **epochs:** define the number of training epochs. (Note: often, 3000+ are common here!)
- **data:** set the path to our yaml file
- **cfg:** specify our model configuration
- **weights:** specify a custom path to weights. (Note: you can download weights from the Ultralytics Google Drive [folder](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J))
- **name:** result names
- **nosave:** only save the final checkpoint
- **cache:** cache images for faster training

In [None]:
# time its performance
%%time
%cd /content/yolov5/
!python train.py --img 640 --batch 16 --epochs 1500 --data '../data.yaml' --cfg ./models/custom_yolov5s.yaml --weights '' --name yolov5s_results  --cache

# Evaluate Custom YOLOv5 Detector Performance

In [None]:
# Start tensorboard
# Launch after you have started training
# logs save in the folder "runs"
%load_ext tensorboard
%tensorboard --logdir runs

# Evaluate on test data

In [None]:
%cd /content/yolov5/
!python test.py --weights '' --img-size 720 --conf-thres 0.01 --data /content/data.yaml --task 'test' --verbose

/content/yolov5
Namespace(augment=False, batch_size=32, conf_thres=0.01, data='/content/data.yaml', device='', exist_ok=False, img_size=640, iou_thres=0.6, name='exp', project='runs/test', save_conf=False, save_json=False, save_txt=False, single_cls=False, task='test', verbose=True, weights=['/content/drive/MyDrive/0Thesis/Hi-Res/TRACK/BIN_v5-640/best.pt'])
Using torch 1.7.0+cu101 CUDA:0 (Tesla P100-PCIE-16GB, 16280MB)

Fusing layers... 
Model Summary: 232 layers, 7246518 parameters, 0 gradients
Scanning '../test/labels.cache' for images and labels... 81 found, 0 missing, 27 empty, 0 corrupted: 100% 81/81 [00:00<00:00, 630312.85it/s]
               Class      Images     Targets           P           R      mAP@.5  mAP@.5:.95: 100% 3/3 [00:05<00:00,  1.78s/it]
                 all          81          89       0.747       0.955       0.948       0.513
Speed: 4.2/3.4/7.7 ms inference/NMS/total per 640x640 image at batch-size 32
Results saved to runs/test/exp3


# Install SORT Tracker functions

In [None]:
%cd /content/
!git clone https://github.com/abewley/sort.git  # clone repo

In [None]:
%cd /content/
!pip install imgaug==0.2.5
!pip install -qr sort/requirements.txt  # install dependencies (ignore errors)
%cd sort

In [None]:
# BYTTER STRUKTUR, SÅ TILGANG PÅ SORT BLIR TILGJENGELIG I DETECT
%cp -r /content/sort/ /content/yolov5/

In [None]:
%%writefile /content/yolov5/sort/sort.py

from __future__ import print_function

import os
import numpy as np
import matplotlib
#matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage import io

import glob
import time
import argparse
from filterpy.kalman import KalmanFilter

np.random.seed(0)


def linear_assignment(cost_matrix):
  try:
    import lap
    _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
    return np.array([[y[i],i] for i in x if i >= 0]) #
  except ImportError:
    from scipy.optimize import linear_sum_assignment
    x, y = linear_sum_assignment(cost_matrix)
    return np.array(list(zip(x, y)))


def iou_batch(bb_test, bb_gt):
  """
  From SORT: Computes IUO between two bboxes in the form [x1,y1,x2,y2]
  """
  bb_gt = np.expand_dims(bb_gt, 0)
  bb_test = np.expand_dims(bb_test, 1)
  
  xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
  yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
  xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
  yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
  w = np.maximum(0., xx2 - xx1)
  h = np.maximum(0., yy2 - yy1)
  wh = w * h
  o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])                                      
    + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)                                              
  return(o)  


def convert_bbox_to_z(bbox):
  """
  Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
    [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
    the aspect ratio
  """
  w = bbox[2] - bbox[0]
  h = bbox[3] - bbox[1]
  x = bbox[0] + w/2.
  y = bbox[1] + h/2.
  s = w * h    #scale is just area
  r = w / float(h)
  return np.array([x, y, s, r]).reshape((4, 1))


def convert_x_to_bbox(x,score=None):
  """
  Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
    [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
  """
  w = np.sqrt(x[2] * x[3])
  h = x[2] / w
  if(score==None):
    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
  else:
    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))


class KalmanBoxTracker(object):
  """
  This class represents the internal state of individual tracked objects observed as bbox.
  """
  count = 0
  def __init__(self,bbox):
    """
    Initialises a tracker using initial bounding box.
    """
    #define constant velocity model
    self.kf = KalmanFilter(dim_x=7, dim_z=4) 
    self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
    self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])

    self.kf.R[2:,2:] *= 10.
    self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
    self.kf.P *= 10.
    self.kf.Q[-1,-1] *= 0.01
    self.kf.Q[4:,4:] *= 0.01

    self.kf.x[:4] = convert_bbox_to_z(bbox)
    self.time_since_update = 0
    self.id = KalmanBoxTracker.count
    KalmanBoxTracker.count += 1
    self.history = []
    self.hits = 0
    self.hit_streak = 0
    self.age = 0

  def update(self,bbox):
    """
    Updates the state vector with observed bbox.
    """
    self.time_since_update = 0
    self.history = []
    self.hits += 1
    self.hit_streak += 1
    self.kf.update(convert_bbox_to_z(bbox))

  def predict(self):
    """
    Advances the state vector and returns the predicted bounding box estimate.
    """
    if((self.kf.x[6]+self.kf.x[2])<=0):
      self.kf.x[6] *= 0.0
    self.kf.predict()
    self.age += 1
    if(self.time_since_update>0):
      self.hit_streak = 0
    self.time_since_update += 1
    self.history.append(convert_x_to_bbox(self.kf.x))
    return self.history[-1]

  def get_state(self):
    """
    Returns the current bounding box estimate.
    """
    return convert_x_to_bbox(self.kf.x)


def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
  """
  Assigns detections to tracked object (both represented as bounding boxes)
  Returns 3 lists of matches, unmatched_detections and unmatched_trackers
  """
  if(len(trackers)==0):
    return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)

  iou_matrix = iou_batch(detections, trackers)

  if min(iou_matrix.shape) > 0:
    a = (iou_matrix > iou_threshold).astype(np.int32)
    if a.sum(1).max() == 1 and a.sum(0).max() == 1:
        matched_indices = np.stack(np.where(a), axis=1)
    else:
      matched_indices = linear_assignment(-iou_matrix)
  else:
    matched_indices = np.empty(shape=(0,2))

  unmatched_detections = []
  for d, det in enumerate(detections):
    if(d not in matched_indices[:,0]):
      unmatched_detections.append(d)
  unmatched_trackers = []
  for t, trk in enumerate(trackers):
    if(t not in matched_indices[:,1]):
      unmatched_trackers.append(t)

  #filter out matched with low IOU
  matches = []
  for m in matched_indices:
    if(iou_matrix[m[0], m[1]]<iou_threshold):
      unmatched_detections.append(m[0])
      unmatched_trackers.append(m[1])
    else:
      matches.append(m.reshape(1,2))
  if(len(matches)==0):
    matches = np.empty((0,2),dtype=int)
  else:
    matches = np.concatenate(matches,axis=0)

  return matches, np.array(unmatched_detections), np.array(unmatched_trackers)


class Sort(object):
  def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
    """
    Sets key parameters for SORT
    """
    self.max_age = max_age
    self.min_hits = min_hits
    self.iou_threshold = iou_threshold
    self.trackers = []
    self.frame_count = 0

  def update(self, dets=np.empty((0, 5))):
    """
    Params:
      dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
    Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
    Returns the a similar array, where the last column is the object ID.
    NOTE: The number of objects returned may differ from the number of detections provided.
    """
    self.frame_count += 1
    # get predicted locations from existing trackers.
    trks = np.zeros((len(self.trackers), 5))
    to_del = []
    ret = []
    for t, trk in enumerate(trks):
      pos = self.trackers[t].predict()[0]
      trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
      if np.any(np.isnan(pos)):
        to_del.append(t)
    trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
    for t in reversed(to_del):
      self.trackers.pop(t)
    matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks, self.iou_threshold)

    # update matched trackers with assigned detections
    for m in matched:
      self.trackers[m[1]].update(dets[m[0], :])

    # create and initialise new trackers for unmatched detections
    for i in unmatched_dets:
        trk = KalmanBoxTracker(dets[i,:])
        self.trackers.append(trk)
    i = len(self.trackers)
    for trk in reversed(self.trackers):
        d = trk.get_state()[0]
        if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
          ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
        i -= 1
        # remove dead tracklet
        if(trk.time_since_update > self.max_age):
          self.trackers.pop(i)
    if(len(ret)>0):
      return np.concatenate(ret)
    return np.empty((0,5))


def parse_args():
    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='SORT demo')
    parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
    parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
    parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
    parser.add_argument("--max_age", 
                        help="Maximum number of frames to keep alive a track without associated detections.", 
                        type=int, default=1)
    parser.add_argument("--min_hits", 
                        help="Minimum number of associated detections before track is initialised.", 
                        type=int, default=3)
    parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
    args = parser.parse_args()
    return args

if __name__ == '__main__':
  '''
  # all train
  args = parse_args()
  display = args.display
  phase = args.phase
  total_time = 0.0
  total_frames = 0
  colours = np.random.rand(32, 3) #used only for display
  if(display):
    if not os.path.exists('mot_benchmark'):
      print('\n\tERROR: mot_benchmark link not found!\n\n    Create a symbolic link to the MOT benchmark\n    (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n    $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
      exit()
    plt.ion()
    fig = plt.figure()
    ax1 = fig.add_subplot(111, aspect='equal')

  if not os.path.exists('output'):
    os.makedirs('output')
  pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt')
  for seq_dets_fn in glob.glob(pattern):
    mot_tracker = Sort(max_age=args.max_age, 
                       min_hits=args.min_hits,
                       iou_threshold=args.iou_threshold) #create instance of the SORT tracker
    seq_dets = np.loadtxt(seq_dets_fn, delimiter=',')
    seq = seq_dets_fn[pattern.find('*'):].split('/')[0]
    
    with open('output/%s.txt'%(seq),'w') as out_file:
      print("Processing %s."%(seq))
      for frame in range(int(seq_dets[:,0].max())):
        frame += 1 #detection and frame numbers begin at 1
        dets = seq_dets[seq_dets[:, 0]==frame, 2:7]
        dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
        total_frames += 1

        if(display):
          fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase, seq, frame)
          im =io.imread(fn)
          ax1.imshow(im)
          plt.title(seq + ' Tracked Targets')

        start_time = time.time()
        trackers = mot_tracker.update(dets)
        cycle_time = time.time() - start_time
        total_time += cycle_time

        for d in trackers:
          print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
          if(display):
            d = d.astype(np.int32)
            ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))

        if(display):
          fig.canvas.flush_events()
          plt.draw()
          ax1.cla()

  print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))

  if(display):
    print("Note: to get real runtime results run without the option: --display")
  '''

Overwriting /content/yolov5/sort/sort.py


# Adding the SORT Tracker to the detection model with binary classes

In [None]:
%%writefile /content/yolov5/detect.py

import argparse
import time
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
    strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

from sort.sort import *

def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None

    save_img = True
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(10)]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    
    # Ola tull
    i = 0

    #################### SORT
    mot_tracker = Sort(max_age=10, min_hits=10, iou_threshold=0.1) #create instance of the SORT tracker
    #mot_tracker = Sort(max_age=1, min_hits=3, iou_threshold=0.3) #create instance of the SORT tracker
    ####################

    for path, img, im0s, vid_cap in dataset: # Kaller automatisk __next__
        # im0s / im0 = image, BGR, letterbox => for film 1280x720 (input str)
        # img = RGB, continous array => for detection 640 (gitt str)
        
        print('\n\nFØR-------------------------')
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        
        # Process detections
        print('\nlen pred')
        print(len(pred))
        print(pred)
        print('\n')
        for i, det in enumerate(pred):  # detections per image # KJØRER UANSETT KUN 1 GANG PER IMG???? JA; sjekket
            print('hva er så iter av det?: i, det')
            print(i)
            print(det)

            p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            
            ####################################################################
            # det er xy xy
            if len(det): # IF ANY DETECTIONS IN CURRENT LINE IN FRAME
                
                print('Trackers 1 Blir Kjørt')

                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                

                print("1240, 720 x,y,x,y conf, object_num (1-5)")
                print(det[:, :4])
                # # . P(class)=P(class|obj)P(obj). Yolo3 har verdi for begge i hver BB (se yolov3 notion)


                #################### SORT ########################################

                dets = det.cpu().detach().numpy()[:,:5]
                #[[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] Her 1280, 720

                # SCALE TO BIGGER BOUNDING BOX (beter IOU)
                dets[:,:2] -= 20
                dets[:,2:4] += 20

                for row in dets:
                    # x1 y1 our of frame
                    if row[0] <= 0:
                        row[0] = 1
                    if row[1] <= 0:
                        row[1] = 1

                    # x2 y2 our of frame
                    if row[2] >= im0.shape[1]:
                        row[2] = im0.shape[1] #linewidth buffer?
                    if row[3] >= im0.shape[0]:
                        row[3] = (im0.shape[0]-1) #linewidth buffer?

                trackers = mot_tracker.update(dets)


                ### ATTACH LABEL AND CONF BACK TO FIGURE
                labels = det.cpu().detach().numpy()[:,5]
                labels = labels[::-1] # labels reversed after tracker apparently
                print(labels)

                confidense = det.cpu().detach().numpy()[:,4]
                confidense = confidense[::-1]
                print(confidense)

                # Skriver antall hver klassse detektert i bilde
                #for c in det[:, -1].unique():
                for c in det[:, -1].unique(): #TRACKING
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for i, obj in enumerate(trackers):
 

                #for *xyxy, conf, cls in reversed(det):
                #
                #    # DROPPER DETTE FOR TRACKING
                #    if save_img or view_img:  # BOUNDING BOX
                #        label = '%s %.2f' % (names[int(cls)], conf)
                #        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

                    if save_img or view_img:  # BOUNDING BOX
                        label = '%s %.2f %s' % (int(obj[4]), confidense[i], names[int(labels[i])])
                        plot_one_box(np.array(obj[:4]), im0, label=label, color=colors[int(obj[4]%10)], line_thickness=3)

            else: # If no detections
                print('Trackers 2 Blir Kjørt')
                trackers = mot_tracker.update(np.empty((0, 5)))

            ####################################################################
            
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            '''
            # Stream results
            if view_img: # DEFAULT NEI
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration
            '''

            # Save results (image with detections)
            if save_img: # ALLTID TRUE, SÅ LENGE IKKE WEBCAM
                if dataset.mode == 'images': # NEI ER 'video'
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # IF NEW VIDEO (ER KUN 1 GANG)
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS) # vid_cap = CAPTURER VideoCapture(der video source)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0) # KUN DENNE BLIR UTFØRT ------ SKRIVER TIL FIL

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))



def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)



if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)

    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                strip_optimizer(opt.weights)
        else:
            detect()

Overwriting /content/yolov5/detect.py


# Evaluate model with tracker on video

In [None]:
# --conf 0.25 standard
%cd /content/yolov5/
!python detect.py --weights /content/drive/MyDrive/0thesis/Synch/Best/weights/best.pt --img 720 --conf 0.25 --source /content/drive/MyDrive/0thesis/Synch/Sped-Up_IR1.mp4 --name /content/drive/MyDrive/0thesis/Synch/4A_EO

# ---------------------

# Adding the SORT Tracker to the detection model with multiple classes

In [None]:
%%writefile /content/yolov5/detect.py

import argparse
import time
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
    strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

from sort.sort import *

def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None

    save_img = True
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    
    # Ola tull
    i = 0

    #################### SORT
    mot_tracker = Sort(max_age=10, min_hits=10, iou_threshold=0.1) #create instance of the SORT tracker
    #mot_tracker = Sort(max_age=1, min_hits=3, iou_threshold=0.3) #create instance of the SORT tracker
    ####################

    for path, img, im0s, vid_cap in dataset: # Kaller automatisk __next__
        # im0s / im0 = image, BGR, letterbox => for film 1280x720 (input str)
        # img = RGB, continous array => for detection 640 (gitt str)
        
        print('\n\nFØR-------------------------')
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        
        # Process detections
        print('\nlen pred')
        print(len(pred))
        print(pred)
        print('\n')
        for i, det in enumerate(pred):  # detections per image # KJØRER UANSETT KUN 1 GANG PER IMG???? JA; sjekket
            print('hva er så iter av det?: i, det')
            print(i)
            print(det)

            p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            
            ####################################################################
            # det er xy xy
            if len(det): # IF ANY DETECTIONS IN CURRENT FRAME (Always > 1, so I dont understand
                
                print('Trackers 1 Blir Kjørt')

                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                

                print("1240, 720 x,y,x,y conf, object_num (1-5)")
                print(det[:, :4])
                # # . P(class)=P(class|obj)P(obj). Yolo3 har verdi for begge i hver BB (se yolov3 notion)


                #################### SORT ########################################

                dets = det.cpu().detach().numpy()[:,:5]
                #[[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] Her 1280, 720

                # SCALE TO BIGGER BOUNDING BOX (beter IOU)
                dets[:,:2] -= 20
                dets[:,2:4] += 20

                for row in dets:
                    # x1 y1 our of frame
                    if row[0] <= 0:
                        row[0] = 1
                    if row[1] <= 0:
                        row[1] = 1

                    # x2 y2 our of frame
                    if row[2] >= im0.shape[1]:
                        row[2] = im0.shape[1] #linewidth buffer?
                    if row[3] >= im0.shape[0]:
                        row[3] = (im0.shape[0]-1) #linewidth buffer?

                trackers = mot_tracker.update(dets)


                ### ATTACH LABEL AND CONF BACK TO FIGURE
                labels = det.cpu().detach().numpy()[:,5]
                labels = labels[::-1] # labels reversed after tracker apparently
                print(labels)

                confidense = det.cpu().detach().numpy()[:,4]
                confidense = confidense[::-1]
                print(confidense)

                # Skriver antall hver klassse detektert i bilde
                #for c in det[:, -1].unique():
                for c in det[:, -1].unique(): #TRACKING
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for i, obj in enumerate(trackers):
 

                #for *xyxy, conf, cls in reversed(det):
                #
                #    # DROPPER DETTE FOR TRACKING
                #    if save_img or view_img:  # BOUNDING BOX
                #        label = '%s %.2f' % (names[int(cls)], conf)
                #        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

                    if save_img or view_img:  # BOUNDING BOX
                        label = '%s %.2f %s' % (int(obj[4]), confidense[i], names[int(labels[i])])
                        plot_one_box(np.array(obj[:4]), im0, label=label, color=colors[int(obj[4])%4], line_thickness=3)

            else: # If no detections
                print('Trackers 2 Blir Kjørt')
                trackers = mot_tracker.update(np.empty((0, 5)))

            ####################################################################
            
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            '''
            # Stream results
            if view_img: # DEFAULT NEI
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration
            '''

            # Save results (image with detections)
            if save_img: # ALLTID TRUE, SÅ LENGE IKKE WEBCAM
                if dataset.mode == 'images': # NEI ER 'video'
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # IF NEW VIDEO (ER KUN 1 GANG)
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS) # vid_cap = CAPTURER VideoCapture(der video source)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0) # KUN DENNE BLIR UTFØRT ------ SKRIVER TIL FIL

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))



def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)



if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)

    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                strip_optimizer(opt.weights)
        else:
            detect()

Overwriting /content/yolov5/detect.py


In [None]:
print('Hey')

Hey
