# YOLOv5 ball detection
### References

* YOLOv5 repository - https://github.com/ultralytics/yolov5
* https://www.kaggle.com/code/eneszvo/yolov5-helmet-detection-train-and-inference

In [1]:
!nvidia-smi

Failed to initialize NVML: Unknown Error


In [2]:
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Setup complete. Using torch 1.10.2+cu113 (CPU)


In [3]:
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import subprocess

import glob

In [4]:
class CFG:
    EXP_NAME = "BALL_DET_SAMPLE_SAVECONF"
    IMG_SIZE = 1280
    IMG_HEIGHT = 768
    IMG_WIDTH = 1280
    CONF_THR = 0.25

    YOLO_PATH = "./yolov5"
    MODEL_PATH = "yolov5l6_trained_600images.pt"# in yolov5 folder
    input_movie = "08fd33_0.mp4"# in yolov5 folder
    movie_id = "08fd33_0"
    DET_PATH = f"./{EXP_NAME}/exp/labels"

    orig_movie = f"../input/clips/{input_movie}"

    CROPED_IMG_DIR = "../croped_image"
    

In [5]:
!ls

Dockerfile  cuda-keyring_1.0-1_all.deb	requirements.txt  work
README.md   docker-compose.yaml		run.sh


In [6]:
%cd ./work/yolov5

/workdir/work/yolov5


In [7]:
!ls

08fd33_0.mp4		  classify	       requirements.txt
BALL_DET_SAMPLE_SAVECONF  data		       setup.cfg
CONTRIBUTING.md		  detect.py	       train.py
DFL			  export.py	       tutorial.ipynb
LICENSE			  hubconf.py	       utils
README.md		  models	       val.py
__pycache__		  output_ball_det.mp4  yolov5l6_trained_600images.pt


# YOLOv5 Inference with clip/08fd33_0.mp4

In [8]:
# !python detect.py --img {CFG.IMG_SIZE} \
#                   --weights {CFG.MODEL_PATH} \
#                   --source {CFG.input_movie} \
#                   --project {CFG.EXP_NAME} \
#                   --save-txt \
#                   --save-conf

# ボールの移動距離が長いところだけ切り取ってクロップしたい

In [9]:
def yolobbox_to_pixel(bbox_, WIDTH=CFG.IMG_WIDTH, HEIHGT=CFG.IMG_HEIGHT):
    x_min = int( (bbox[0] - bbox[2])*WIDTH )
    y_min = int( (bbox[1] - bbox[3])*HEIHGT )
    x_max = int( (bbox[0] + bbox[2])*WIDTH )
    y_max = int( (bbox[1] + bbox[3])*HEIHGT )
    return [x_min, y_min, x_max, y_max]

In [85]:
def yolobbox_to_croparea(bbox_, WIDTH=CFG.IMG_WIDTH, HEIHGT=CFG.IMG_HEIGHT):
    """
    検出したボールを中心として、CROP_AREA_X*2, CROP_AREA_Y*2の範囲をクロップする予定
    そのための座標情報を入力する。
    """
    x_mid = int( bbox_[0]*WIDTH )
    y_mid = int( bbox_[1]*HEIHGT )
    
    CROP_AREA_X = WIDTH // 4
    CROP_AREA_Y = HEIHGT // 4  
    x_min = x_mid - CROP_AREA_X
    x_max = x_mid + CROP_AREA_X
    if x_min < 0:
        x_min += abs(x_min)
        x_max += abs(x_min)
    elif x_max > WIDTH:
        x_min -= (x_max - WIDTH)
        x_max -= (x_max - WIDTH)

    y_min = y_mid - CROP_AREA_Y
    y_max = y_mid + CROP_AREA_Y
    if y_min < 0:
        y_min += abs(y_min)
        y_max += abs(y_min)
    elif y_max > HEIHGT:
        y_min -= (y_max - HEIHGT)
        y_max -= (y_max - HEIHGT)

    return [x_min, y_min, x_max, y_max]

In [86]:
def calculate_distance(bbox1_, bbox2_):# もしかしたらあとでなんか変えるかも？？
    return (bbox1_[0] - bbox2_[0])**2 + (bbox1_[1] - bbox2_[1])**2

In [87]:
def calculate_speed(bbox1_, bbox2_):# とりあえずフレーム間の距離をspeedとして扱うことにする。
    height_rate = 4 # 画面内での横の移動より縦の移動のほうが実際の移動距離は長い。適当な補正をかける。
    return (bbox1_[0] - bbox2_[0])**2 + (height_rate * (bbox1_[1] - bbox2_[1])**2)

In [98]:
def pickup_play_scene(bbox_frames_list_, ball_bbox_, image_, speed_thr=0.0010):
    """
    input 
        bbox_frames_list_   : 現時刻のフレームまでの{calspeed_frame_num}フレーム分のボールのbboxのリスト
        ball_bbox           : 現時刻のフレームで検出したボールのbbox
        image_              : 現時刻のフレームの画像
    
    output
        bbox_frames_list_   : 現時刻のフレームのボールbbox追加後のbboxリスト
        is_move_detected    : bbox_frames_list内のbbox位置と現時刻のbbox位置との移動距離が閾値以上でTrue
    """
    
    if len(bbox_frames_list_) > calspeed_frame_num:
        bbox_frames_list_ = bbox_frames_list_[1:calspeed_frame_num]
    speed = 0
    for bbox_ in bbox_frames_list_:
        # calculate 10frames speed of ball
        speed = max(calculate_speed(bbox_, ball_bbox_), speed)
    bbox_frames_list_.append(ball_bbox_)
    # print(speed)

    is_move_detected = speed > speed_thr
    # print(is_move_detected)
    # if is_move_detected:
        # image_ = cv2.resize(image_, dsize=(CFG.IMG_WIDTH, CFG.IMG_HEIGHT))
        # plt.imshow(image_)
        # plt.show()

        # crop_area = yolobbox_to_croparea(ball_bbox_)
        # print(f"ball_bbox={ball_bbox_}", end="; ")
        # print(f"area={crop_area}",end="; ")
        # croped_image = image_[crop_area[1]:crop_area[3], crop_area[0]:crop_area[2],  :]
        # plt.imshow(croped_image)
        # plt.show()
    
    return bbox_frames_list_, is_move_detected

In [121]:
def CropImage_and_MakeFrameBboxList(movie_id_, frame_id_, fps_, bbox_frames_list_, framebbox_list_, saved_frame_list_, rewind_time=1.0):
    rewind_idx = int(rewind_time*fps_)
    
    if frame_id_ - rewind_idx < 1:
        rewind_idx = frame_id_
    list_pick_idx = -rewind_idx
    # print(f"Move Detected Frame={frame_id_}. Saving Frame:{frame_id - rewind_idx} to {frame_id_}")

    for cropping_frame in range(frame_id_ - rewind_idx, frame_id_):
        # print(f"\t crop frame:{cropping_frame}", end=" ")
        cropping_framebbox = [cropping_frame]
        cropping_framebbox.extend(bbox_frames_list_[list_pick_idx])
        if cropping_frame in saved_frame_list_:# すでにcropして保存済みであればcontinue
            list_pick_idx += 1
            continue

        else:# まだ保存してない場合は保存処理
            framebbox_list_.append(cropping_framebbox)
            crop_area = yolobbox_to_croparea(bbox_frames_list_[list_pick_idx]) # クロップ位置の計算
            # print(f"crop pos={crop_area}, bbox_pos={bbox_frames_list_[list_pick_idx]}, bbox_idx={list_pick_idx}")
            cap.set(cv2.CAP_PROP_POS_FRAMES, cropping_frame) # 動画のフレーム位置を指定フレームまで巻き戻す
            ret, frame_image = cap.read()# 指定フレームの画像読み込み
            if not ret:
                # print(f"frame:{cropping_frame} doesn's exist.")
                continue
            else:
                # 指定位置をクロップ
                frame_image = cv2.resize(frame_image, dsize=(CFG.IMG_WIDTH, CFG.IMG_HEIGHT))
                croped_image = frame_image[crop_area[1]:crop_area[3], crop_area[0]:crop_area[2],  :]
                # 保存用ディレクトリを作成して保存
                image_save_dir = f"{CFG.CROPED_IMG_DIR}/{movie_id_}"
                os.makedirs(image_save_dir, exist_ok=True)
                croped_image_filename = f"{image_save_dir}/croped_{movie_id_}_{cropping_frame}.jpg"
                # plt.imshow(croped_image)
                # plt.show()
                cv2.imwrite(croped_image_filename, croped_image)
                # print(f"\t {croped_image_filename} is succesfully saved.")
            saved_frame_list_.append(cropping_frame)
            list_pick_idx += 1
        
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id_) # 現在のフレーム位置に戻す
    
    return framebbox_list_, saved_frame_list_

In [131]:
bbox_files = glob.glob(f"{CFG.DET_PATH}/*.txt")

movie_id = CFG.movie_id
cap = cv2.VideoCapture(f"{movie_id}.mp4")
frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

bbox_frames_list = [] # 複数フレームのbboxを格納したリスト
framebbox_list = [] # [frame_id, bbox]のリスト
saved_frame_list = [] # 保存したframe_idのリスト
calspeed_frame_num = 25
print(f"Cropping image if ball is moved. Movie_id={movie_id}, frames={frame_num}.")
for frame_idx in tqdm(range(frame_num)):
    frame_id = frame_idx+1
    # Read frame image (cap.readはメソッドが実行されるたびに1フレーム分進める)
    ret, image = cap.read()
    if not ret:
        print(f"frame:{frame_id} doesn's exist.")
        continue
    # print(f"frame:{cap.get(cv2.CAP_PROP_POS_FRAMES)} : ", end="\t")

    det_file_name = f"{CFG.DET_PATH}/{CFG.movie_id}_{frame_id}.txt"
    if not det_file_name in bbox_files:
        print("not detected.")

    else:
        # select highest conf bbox in txt-file
        with open(det_file_name, "rb") as f:
            det_file_data = f
            bbox_txt = [s.strip() for s in det_file_data.readlines()]

        bbox_tmp = []
        ball_bbox = []
        if len(bbox_txt) > 1:
            # 最初の検出ではconfが最も高いものを検出結果として選ぶ
            if len(bbox_tmp) == 0:
                best_conf = .0
                for bbox in bbox_txt:
                    conf_and_bbox_list = str(bbox).strip("'").split(" ")
                    conf_and_bbox = np.float_(conf_and_bbox_list[1:])
                    # print("conf and bbox", conf_and_bbox)
                    if best_conf < conf_and_bbox[4]:
                        best_conf = conf_and_bbox[4]
                        ball_bbox = conf_and_bbox[0:4]
            # 2回目以降は検出値とユークリッド距離が近いものを検出とする
            else:
                nearest_distance = 1e10
                for bbox in bbox_txt:
                    conf_and_bbox_list = str(bbox).strip("'").split(" ")
                    conf_and_bbox = np.float_(conf_and_bbox_list[1:])
                    # print("conf and bbox", conf_and_bbox)
                    distance = calculate_distance(conf_and_bbox, bbox_tmp)
                    if nearest_distance > distance:
                        nearest_distance = distance
                        ball_bbox = conf_and_bbox[0:4]

        else:
            conf_and_bbox_list = str(bbox_txt).strip("'").split(" ")
            ball_bbox = np.float_(conf_and_bbox_list[1:5])
    
        # 前回の検出位置を格納しておく
        bbox_tmp = ball_bbox
        # detectしてないときは前回値をそのまま使う
        bbox_frames_list, is_move_detected = pickup_play_scene(bbox_frames_list, ball_bbox, image)

        # ボールの移動判定が入ったら、0.5秒(関数内で指定した秒数)分の戻ったフレームから画像をcropして保存する
        if is_move_detected:
            framebbox_list, saved_frame_list = CropImage_and_MakeFrameBboxList(movie_id, frame_id, fps, bbox_frames_list, framebbox_list, saved_frame_list)

        if cap.get(cv2.CAP_PROP_POS_FRAMES) > 30:
            break
croped_frame_df = pd.DataFrame(framebbox_list, columns=["frame_id", "bbox_xmid", "bbox_ymid", "bbox_height", "bbox_width"])
croped_csvfile_name = f"{CFG.CROPED_IMG_DIR}/{movie_id}/croped_frame.csv"
croped_frame_df.to_csv(croped_csvfile_name, index=False)

Cropping image if ball is moved. Movie_id=08fd33_0, frames=750.


  4% 30/750 [00:02<00:48, 14.92it/s]
