In [1]:
import os
import sys
from pathlib import Path

import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import easydict
from source.model.yolo.detect import run as yolo
import glob

In [2]:
args = easydict.EasyDict(
    {
        "source": './dataset/train/video_0000.mp4',# 전체 동영상 갖고오기: './dataset/train/*.mp4',
        # "save_crop": True,
        "imgsz": [1024, 1024],
        "device": '0, 1',
        "project": './dataset/train_yolo',
        "save_txt": True,
        # "classes": 0,
        "clip_len": 5,
    }
)

In [3]:
names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 
        'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 
        'teddy bear', 'hair drier', 'toothbrush']

In [4]:
print(names[0], names[25], names[1], names[3])

person umbrella bicycle motorcycle


In [5]:
import pandas as pd

In [6]:
cls_li = {
    0: "driveway_walk", # person
    1: "fall_down", # person
    2: "fighting", # person
    3: "jay_walk", # person
    4: "normal", # nothing..?
    5: "putup_umbrella", # person + umbrella
    6: "ride_cycle", # person + bicycle
    7: "ride_kick", # person
    8: "ride_moto", # person + motorcycle
}

In [7]:
# make string of class to numbers.
def string_to_num(row):
    if row['class'] == cls_li[0]:
        row['class'] = 0
    elif row['class'] == cls_li[1]:
        row['class'] = 1
    elif row['class'] == cls_li[2]:
        row['class'] = 2
    elif row['class'] == cls_li[3]:
        row['class'] = 3
    elif row['class'] == cls_li[4]:
        row['class'] = 4
    elif row['class'] == cls_li[5]:
        row['class'] = 5
    elif row['class'] == cls_li[6]:
        row['class'] = 6
    elif row['class'] == cls_li[7]:
        row['class'] = 7
    elif row['class'] == cls_li[8]:
        row['class'] = 8
    return row

In [8]:
df = pd.read_csv("./dataset/train_data.csv").set_index('video_filename')
df = df.apply(string_to_num, axis='columns')
df = df.drop(['id'], axis=1)

In [9]:
df.head(5)

Unnamed: 0_level_0,class
video_filename,Unnamed: 1_level_1
video_0000.mp4,5
video_0001.mp4,7
video_0002.mp4,1
video_0003.mp4,0
video_0004.mp4,0


In [10]:
path = yolo(
    source=args.source,
    clip_len=args.clip_len,
    # save_crop=args.save_crop,
    imgsz=args.imgsz,
    device=args.device,
    project=args.project,
    save_txt=args.save_txt,
    # classes=args.classes,
)

YOLOv5 🚀 179335f torch 1.8.2 CUDA:0 (NVIDIA GeForce RTX 3090, 24268.3125MB)
                              CUDA: 1 (NVIDIA Graphics Device, 16117.3125MB)

Fusing layers... 
Model Summary: 213 layers, 7225885 parameters, 0 gradients


video 1/1 (1/75) /home/stephencha/Hub/ai-challenge/dataset/train/video_0000.mp4: 576x1024 2 persons, 4 cars, 1 traffic light, 2 umbrellas, Done. (0.008s)
video 1/1 (16/75) /home/stephencha/Hub/ai-challenge/dataset/train/video_0000.mp4: 576x1024 2 persons, 4 cars, 1 traffic light, 2 umbrellas, Done. (0.006s)
video 1/1 (31/75) /home/stephencha/Hub/ai-challenge/dataset/train/video_0000.mp4: 576x1024 2 persons, 4 cars, 1 traffic light, 3 umbrellas, Done. (0.006s)
video 1/1 (46/75) /home/stephencha/Hub/ai-challenge/dataset/train/video_0000.mp4: 576x1024 3 persons, 5 cars, 2 umbrellas, Done. (0.006s)
video 1/1 (61/75) /home/stephencha/Hub/ai-challenge/dataset/train/video_0000.mp4: 576x1024 3 persons, 4 cars, 1 traffic light, 2 umbrellas, Done. (0.006s)
Speed: 0.3ms pre-process, 6.0ms inference, 0.7ms NMS per image at shape (1, 3, 1024, 1024)
Results saved to [1mdataset/train_yolo[0m
5 labels saved to dataset/train_yolo/labels
[(0, 1, 2, [283.0, 135.0, 303.0, 149.0]), (0, 1, 2, [97.0, 122.0

In [11]:
filenames = os.listdir("/home/stephencha/Hub/ai-challenge/"+str(path)+"/labels")
filenames.sort()
for f in filenames:
    a = f.split("_", 2)
    b = a[2].split(".")
    print("video number: ", a[1])
    print("frame number: ", b[0])
    
    file = open(f, "r")
    while True:
        line = file.readline()
        line = line.split()
        if not line: break
        
        cls_obj = int(line[0])
        centre_obj_x = float(line[1])
        centre_obj_y = float(line[2])
        width = float(line[3])
        height = float(line[4])
        print("class: ", names[cls_obj], ", Center Coordinate: ", (centre_obj_x, centre_obj_y), ", Width and Height: ", (width, height))
    

IndexError: list index out of range