<a href="https://colab.research.google.com/github/healthonrails/annolid/blob/main/docs/tutorials/yolov8_tracking_tutorial.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
# Annolid on YOLOv8
This notebook show examples for how to upload a custom dataset, train a new model based on the dataset, and inference on provided videos.


# Setup

Clone repo, install dependencies and check PyTorch and GPU.

In [None]:
!git clone --recurse-submodules https://github.com/healthonrails/yolov8_tracking.git  # clone repo
%cd /content/yolov8_tracking
%pip install pycocotools
%pip install ultralytics
%pip install -qr requirements.txt  # install dependencies
%pip install thop                  # install dependencies
%pip install sentry_sdk
import torch
from IPython.display import Image, clear_output  # to display images
from google.colab import files
clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

# Download FastSAM code and weights.
https://github.com/CASIA-IVA-Lab/FastSAM

In [None]:
!git clone https://github.com/CASIA-IVA-Lab/FastSAM.git

In [None]:
!wget https://huggingface.co/spaces/An-619/FastSAM/resolve/main/checkpoints/FastSAM.pt

In [None]:
!pip install -r FastSAM/requirements.txt
!pip install git+https://github.com/openai/CLIP.git

# Predict your video on a pretrained segmentation model

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from pycocotools import mask as mask_util

In [None]:
image = cv2.imread('/content/R2202_02-10-2023_000100275.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


In [None]:
original_h = image.shape[0]
original_w = image.shape[1]
plt.figure(figsize=(10, 10))
plt.imshow(image)

In [None]:
!python FastSAM/Inference.py  --model_path FastSAM.pt --img_path /content/R2202_02-10-2023_000100275.png --device 0

In [None]:
image = cv2.imread('/content/yolov8_tracking/output/R2202_02-10-2023_000100275.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(10, 10))
plt.imshow(image)

In [None]:
source = '/content/birds_dance_bbc.mp4'  #@param predict on a video

In [None]:
# Load the video file
video = cv2.VideoCapture(source)
# Get the video width and height
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(f'Video width: {width}')
print(f'Video height: {height}')

# Release the video
video.release()

In [None]:
def numpy_mask_to_rle(np_mask):
    np_mask = cv2.resize(np_mask, (width, height))
    np_mask = (np_mask * 255).astype(np.uint8)
    rle_encoding = mask_util.encode(np.asfortranarray(np_mask))
    return rle_encoding

In [None]:
from ultralytics import YOLO
from ultralytics.yolo.utils.ops import scale_boxes, scale_image
# Load a model
model_name = 'yolov8n-seg.pt' #@param ['YOLOv8l-seg','yolov8n-seg.pt','yolov8x-seg.pt',"/content/yolov8_tracking/FastSAM.pt"]
model = YOLO(model_name)  # load an official model
class_names = model.names # 80 coco class names
# Predict with the model

results = model(source, stream=True)  # generator of Results objects
frame_number = 0
tracking_id = 0
tracking_results = []
for r in results:
    out_dict = {}
    if r:
      boxes = r.boxes.cpu().numpy()  # Boxes object for bbox outputs
      masks = r.masks.cpu().numpy()  # Masks object for segmenation masks outputs
      for i in range(boxes.shape[0]):
        np_mask = masks.data[i,:,:]
        bbox = boxes[i,:]
        box = bbox.xyxy[0]
        classes = bbox.cls
        scores = bbox.conf
        rles = numpy_mask_to_rle(np_mask)
        out_dict['frame_number'] = frame_number
        out_dict['x1'] = box[0]
        out_dict['y1'] = box[1]
        out_dict['x2'] = box[2]
        out_dict['y2'] = box[3]
        out_dict['instance_name'] = class_names[int(classes[0])]
        out_dict['class_score'] = scores[0]
        out_dict['segmentation'] = rles
        out_dict['tracking_id'] = tracking_id
        tracking_results.append(out_dict)
        out_dict = {}
    else:
        out_dict['frame_number'] = frame_number
        out_dict['x1'] = None
        out_dict['y1'] = None
        out_dict['x2'] = None
        out_dict['y2'] = None
        out_dict['instance_name'] = None
        out_dict['class_score'] = None
        out_dict['segmentation'] = None
        out_dict['tracking_id'] = None
        tracking_results.append(out_dict)
        out_dict = {}
    if frame_number % 1000 == 0:
      print(frame_number,bbox.conf[0],int(bbox.cls[0]), bbox.xyxy[0],
              numpy_mask_to_rle(np_mask)
              )
    frame_number += 1


In [None]:
import pandas as pd
df = pd.DataFrame(tracking_results)
df.head()

In [None]:
from google.colab import files
tracking_csv = f"{source.split('.')[0]}_tracking_results.csv"
df.to_csv(tracking_csv)
files.download(tracking_csv)

# Upload and train on your custom dataset

In [None]:
upload = files.upload()
dataset =  list(upload.keys())[0]

In [None]:
#!unzip /content/datasets/test_mouse.zip -d /content/datasets/
!unzip $dataset -d /content/datasets

## Find the data.yaml file and change the train, val, and test absolute paths

In [None]:
# find data.yaml file
import glob
data_ymals = glob.glob('/content/datasets/*/data.yaml')
print("The data.yaml file is located at: ", data_ymals)
dataset_dir = os.path.dirname(data_ymals[0])
print("The dataset directory is located at :", dataset_dir)

This function first searches for a data.yaml file in the specified root directory and its subdirectories. If the file is found, it reads its contents and replaces any lines that match the old_line argument with the new_line argument. It then saves the modified file and prints a message confirming the replacement. If the file is not found, it raises a ValueError.

In [None]:
import os
def replace_yaml_lines(root_dir, old_line, new_line):
    # Find data.yaml file
    for root, dirs, files in os.walk(root_dir):
        if 'data.yaml' in files:
            yaml_path = os.path.join(root, 'data.yaml')
            break
    else:
        raise ValueError('No data.yaml file found in root directory or subdirectories')

    # Replace specified lines
    with open(yaml_path, 'r') as f:
        lines = f.readlines()

    with open(yaml_path, 'w') as f:
        for line in lines:
            if old_line in line:
                line = new_line + '\n'
            f.write(line)

    print(f'Replaced {old_line} with {new_line} in {yaml_path}')


In [None]:
replace_yaml_lines(dataset_dir, 'train: ../train/images', f'train: {dataset_dir}/train/images')
replace_yaml_lines(dataset_dir, 'val: ../val/images', f'val: {dataset_dir}/val/images')
replace_yaml_lines(dataset_dir, 'test: ../test/images', f'test: {dataset_dir}/val/images')

## Train a new model based on the custom new dataset


In [None]:
from ultralytics import YOLO

# Load a model
#model = YOLO("yolov8n-seg.yaml")  # build a new model from scratch
# load a pretrained model (recommended for training)
model_name = "yolov8n-seg.pt" #@param ['yolov8n-seg.pt','yolov8x-seg.pt','yolov8n.pt','yolov8x.pt']
model = YOLO(model_name)
data_yaml_file = data_ymals[0]
num_epoches = 100 #@param
# Train the model
model.train(data=data_yaml_file, epochs=num_epoches)

# Download data

Get test video from repo and extract the first 2 seconds of it or your can upload your video by click the file upload button under Files menu on the left

In [None]:

# upload your video or get the test video from the repo
!wget -nc https://github.com/mikel-brostrom/yolov8_tracking/releases/download/v.2.0/test.avi
# extract 3 seconds worth of video frames of it
!yes | ffmpeg -ss 00:00:00 -i test.avi -t 00:00:01 -vf fps=30 out.avi

## Run inference on video

The ``cv2.imshow()`` and ``cv.imshow()`` functions from the [opencv-python](https://github.com/skvark/opencv-python) package are incompatible with Jupyter notebook; see https://github.com/jupyter/notebook/issues/3935.

Hence we chose to save it to file in this notebook. Locally you can use the ``--show-vid`` flag in order visualize the tracking in real-time

In [None]:
video_file = '/content/directedsong_20221007_clip.mp4' #@param
!python track.py --yolo-weights yolov8m-seg.pt --tracking-method ocsort --reid-weights osnet_x0_25_msmt17.pt --device 0 --source $video_file --save-vid --save-crop --save-txt

# Download and predict a video from YouTube

In [None]:
!pip install pytube

In [None]:
from pytube import YouTube
import os

def downloadYouTube(videourl, path):

    yt = YouTube(videourl)
    yt = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
    if not os.path.exists(path):
        os.makedirs(path)
    yt.download(path)

downloadYouTube('https://youtu.be/W7QZnwKqopo', '/content')

In [None]:
video_file = '/content/birds_dance_bbc.mp4' #@param
model_name = 'yolov8x-seg.pt' #@param ['yolov8n-seg.pt','yolov8x-seg.pt','yolov8n.pt','yolov8x.pt']
!python track.py --yolo-weights $model_name --tracking-method ocsort --reid-weights osnet_x0_25_msmt17.pt --device 0 --source $video_file --save-vid --save-txt --retina-masks


In [None]:
import pandas as pd
##https://github.com/mikel-brostrom/yolov8_tracking/blob/master/track.py#L243
## Write MOT compliant results to file with mask RLE
# define the column names
columns = ['Frame_ID', 'Object_ID', 'bbox_left', 'bbox_top', 'Width', 'Height', 'Confidence', 'Class_ID','Visibility_ratio','idx','segmentation']
df = pd.read_csv(f'/content/yolov8_tracking/runs/track/exp/tracks/{os.path.basename(video_file).split(".")[0]}.txt',header=None,delimiter='\t')

In [None]:
df.columns = columns
df.head()

### Please replace the name of the animal being tracked with the appropriate species name, such as mouse, fish, vole, or other relevant species.

In [None]:
class_name = 'bird' #@param

In [None]:
annolid_columns = ['frame_number','x1','y1','x2','y2','instance_name','class_score','segmentation','tracking_id']

In [None]:
df['x1'] = df.bbox_left
df['y1'] = df.bbox_top
df['x2'] = df.bbox_left + df.Width
df['y2'] = df.bbox_top + df.Height
df['frame_number'] = df.Frame_ID
df['instance_name'] = f"{class_name}"
df['class_score'] = 1
df['segmentation'] = df.segmentation
df['tracking_id'] = df.Object_ID



In [None]:
df_annolid = df[annolid_columns]
df_annolid.head()

In [None]:
tracking_csv_file = f'{video_file.split(".")[0]}_{model_name.split(".")[0]}_tracking_results.csv'
df_annolid.to_csv(tracking_csv_file)
files.download(tracking_csv_file)

# Show results

https://stackoverflow.com/questions/60977179/how-to-play-avi-file-in-google-colab

https://stackoverflow.com/questions/57377185/how-play-mp4-video-in-google-colab

Compress the video file to a format supported by Google Colab (mpeg4 (native) -> h264 (libx264))

In [None]:
!ffmpeg -i /content/yolov8_tracking/runs/track/exp11/bird.mp4 -vf fps=30 -vcodec libx264 output.mp4

Get the file content into data_url

In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('output.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

Display it with HTML

In [None]:
HTML("""
<video controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

## Reference: https://github.com/mikel-brostrom/yolov8_tracking