<a href="https://colab.research.google.com/github/kluless13/paper/blob/main/yolov7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Install dependencies

#### 1.1. Installing dependencies

In [5]:
import os
HOME = "/content/gdrive/MyDrive"
print(HOME)

/content/gdrive/MyDrive


In [None]:
!pip install -r reqs1.txt
!pip install roboflow

## 2. Path to training  
5 versions of YOLOv7 = 5 directories with the train/val/test.py scripts

### 2.1. Training on the dataset

In [None]:
# Setting path to operational directory

# For yolov7 base:
 
%cd {HOME}
%mkdir yolov7-base
%cd {HOME}/yolov7-base
!gitclone 

# Getting the weights too
!wget -P {HOME}/yolov7-base https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt

# Roboflow API into yolov7-base
from Roboflow import roboflow
# and so on

In [None]:
# Now that we have our data and scripts in the same place, we run training.

!python train.py --batch 16 --cfg cfg/training/yolov7.yaml --epochs 55 --data {dataset.location}/data.yaml --weights 'yolov7.pt' --device 0

### 2.2. Running evaluation metrics

The evaluation metrics are F1, mAP, P, R, PR and Accuracy.

In [None]:
%cd {HOME}/yolov7-base

from IPython.display import Image

display(Image("{HOME}/yolov7/runs/train/exp/F1_curve.png", width=400, height=400)) #F1 score
display(Image("{HOME}/yolov7/runs/train/exp/PR_curve.png", width=400, height=400)) #PR curve
display(Image("{HOME}/yolov7/runs/train/exp/confusion_matrix.png", width=500, height=500)) # Confusion matrix
display(Image("{HOME}/yolov7/runs/train/exp/Accuracy.png", width=500, height=500)) # Accuracy
display(Image("{HOME}/yolov7/runs/train/exp/P_curve.png", width=500, height=500)) # Precision
display(Image("{HOME}/yolov7/runs/train/exp/R_curve.png", width=500, height=500)) # Recall
display(Image("{HOME}/yolov7/runs/train/exp/mAP_curve.png", width=500, height=500)) # mAP score

### 2.3. Running inferences on custom video

Import video footage to be able to witness the code in action.

In [None]:
%cd {HOME}/yolo7-base

video_path = ""

In [None]:
# Initializing video object
video = cv2.VideoCapture(video_path)


#Video information
fps = video.get(cv2.CAP_PROP_FPS)
w = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
nframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialzing object for writing video output
output = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'DIVX'),fps , (w,h))
torch.cuda.empty_cache()
# Initializing model and setting it for inference
with torch.no_grad():
  weights, imgsz = opt['weights'], opt['img-size']
  set_logging()
  device = select_device(opt['device'])
  half = device.type != 'cpu'
  model = attempt_load(weights, map_location=device)  # load FP32 model
  stride = int(model.stride.max())  # model stride
  imgsz = check_img_size(imgsz, s=stride)  # check img_size
  if half:
    model.half()

  names = model.module.names if hasattr(model, 'module') else model.names
  colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  if device.type != 'cpu':
    model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))

  classes = None
  if opt['classes']:
    classes = []
    for class_name in opt['classes']:
      classes.append(opt['classes'].index(class_name))

  for j in range(nframes):

      ret, img0 = video.read()
      if ret:
        img = letterbox(img0, imgsz, stride=stride)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
          img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment= False)[0]

        
        pred = non_max_suppression(pred, opt['conf-thres'], opt['iou-thres'], classes= classes, agnostic= False)
        t2 = time_synchronized()
        for i, det in enumerate(pred):
          s = ''
          s += '%gx%g ' % img.shape[2:]  # print string
          gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
          if len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

            for c in det[:, -1].unique():
              n = (det[:, -1] == c).sum()  # detections per class
              s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
    
            for *xyxy, conf, cls in reversed(det):

              label = f'{names[int(cls)]} {conf:.2f}'
              plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)
        
        print(f"{j+1}/{nframes} frames processed")
        output.write(img0)
      else:
        break
    

output.release()
video.release()