## Clone YOLOv3
Clone GitHub repository containing YOLOv3 Pytorch implementation by ultralytics

In [None]:
!git clone https://github.com/stegianna/yolov3.git

## Imports and Setups

In [None]:
%cd yolov3
import time
import glob
import torch
import os

import argparse
from sys import platform

from models import *
from utils.datasets import *
from utils.utils import *

from IPython.display import HTML
from base64 import b64encode

## Prepare YOLOv3 and Define Functions
The default configurations such as image size, confidence threshold, IOU threshold are initialized.

`process_video` capture each video frame, process it and write it in a new video file. The function returns the path where the video processed (with bounding boxes) is located.



In [None]:
# Default configurations
cfg = 'cfg/yolov3-spp.cfg'
names = 'data/coco.names'
weights = 'weights/yolov3-spp-ultralytics.pt'
img_size = 416
conf_thresh = 0.3
iou_thresh = 0.6
person_class = [0]           # 0 correspond to class "person"
agnostic_nms = False         # by default

# Initialize
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

# Initialize model
model = Darknet(cfg, img_size)

# Load weights
attempt_download(weights)
if weights.endswith('.pt'):  # pytorch format
    model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:  # darknet format
    load_darknet_weights(model, weights)

model.to(device).eval();

# Get names and colors
names = load_classes(names)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

%cd .. 

def process_video(path_video, output_dir = 'output'): 

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap  = cv2.VideoCapture(path_video)
    _, img0 = cap.read()

    save_path = os.path.join(output_dir, os.path.split(path_video)[-1]) 
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (w, h))

    while img0 is not None: 

        # Padded resize
        img = letterbox(img0, new_shape=img_size)[0]

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3xHxW
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device)
        img = img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        pred = model(img)[0]
        # Apply NMS
        pred = non_max_suppression(pred, conf_thresh, iou_thresh, classes=person_class, agnostic=agnostic_nms)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            im0 = img0 ##### Ganti im0s menjadi img0

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Write results
                for *xyxy, conf, cls in det:
                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

        vid_writer.write(im0)
        _, img0 = cap.read()

    vid_writer.release()

    return save_path


## Git clone to get video
The video is contained in `input_video` folder of my GitHub repository.\
It has been cutted, so the new version has a duration of 20s.

In [None]:
%%bash
git clone https://github.com/stegianna/AI_project.git
mkdir -p input_video
mkdir -p output_compressed
ffmpeg -ss 00:00:0.0 -i AI_project/input_video/pedestrian.mp4 -c copy -t 00:00:20.0 input_video/pedestrian.mp4

## Process Video

In [None]:
import warnings
warnings.filterwarnings("ignore")

path_video = os.path.join("input_video","pedestrian.mp4")
save_path = process_video(path_video)

# Show video
mp4 = open(path_video,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=700 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)


The video processed, before been shown, must to be compressed with H.264 encoding. This in necessary, differently to the original video, because cv2 library has decode it.

In [None]:
# compress video
compressed_path = os.path.join("output_compressed", os.path.split(save_path)[-1])
os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")

# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=700 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)