# 動画とアノテーションデータからBBox付き動画を作成

In [4]:
import os
import json
import numpy as np
import cv2 as cv
import random
from timeit import default_timer as timer
from PIL import Image, ImageFont, ImageDraw

In [76]:
path_labels    = r'D:\SIGNATE\Signate_3rd_AI_edge_competition\test\annotation\Akiba3.mp4.json'   

classes = ['Car', 'Pedestrian', 'Truck', 'Signal']
#classes = ['車', '歩行者']

In [None]:
import pandas as pd

data       = json.load(open(path_labels))
test = path_labels.split("\\")[-1].split(".")

annotetion_name =test[0] + "." + test[1]
print(annotetion_name)

ids_counter = np.zeros((4, 100000), dtype=int)

Box_Frame = []
video_fps = 29.97

for v in range(0,int(video_fps) * 120):
    BBoxes = []
    labels       = data[annotetion_name][v]

    for c in range (0, len(classes)):
        try:
            for inst in data[annotetion_name][v][classes[c]]:
                box           = inst['box2d']

                if ((box[2]-box[0])*(box[3]-box[1])) < 900: #we do not want small boxes
                    print("除外 :v={}, c={}, box[2]-box[0]={}, box[3]-box[1]={}".format(v,c,box[2]-box[0],box[3]-box[1]))
                    continue

                act_id        = int(inst['id'])
                id_counter     = ids_counter[c,act_id]
                ids_counter[c,act_id] += 1

                BBox_Dict = {
                    "class": c, 
                    "id":act_id, 
                    "BBox":box
                    }

                BBoxes.append(BBox_Dict)
                print(" v={}, c={} :".format(v,c))

        except Exception as e:
            print("v={}, c={} :{}".format(v,c,e))
            continue #nothing, the class is just not presented in the frame

    Box_Frame.append(BBoxes)    

In [80]:
import colorsys
hsv_tuples = [(x / len(classes), 1., 1.) for x in range(len(classes))]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
colors = [(255, 0, 255), (0, 255, 255)]

In [81]:
print(colors)

[(255, 0, 255), (0, 255, 255)]


In [82]:
from PIL import Image, ImageFont, ImageDraw
def detect_image(image, BBox):
    start = timer()

    size1=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')

    font = ImageFont.truetype(font='fonts/arial.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    thickness = (image.size[0] + image.size[1]) // 300


    for i in range(len(BBox)):

        class_no = BBox[i]["class"]
        predicted_class = classes[class_no]
        box = BBox[i]["BBox"]

        label = '{}'.format(int(BBox[i]["id"]))
        draw = ImageDraw.Draw(image)
        label_size = draw.textsize(label, font)

        left, top, right, bottom = box

        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))

        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])

        # My kingdom for a good redistributable image drawing library.
        for i in range(thickness):
            draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[class_no])

        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[class_no])
        
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw

    end = timer()
    print(end - start)
    return image

In [74]:
def detect_video(video_path, output_path):
    import cv2
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    video_FourCC    = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps       = vid.get(cv2.CAP_PROP_FPS)
    video_size      = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                        int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print("video_fps={}, video_size={}".format(video_fps, video_size))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()
    

    
    #for v in range(0,600):
    for v in range(0,int(video_fps) * 120):
        return_value, frame = vid.read()
        image = Image.fromarray(frame)
        image = detect_image(image, Box_Frame[v])
        
        result = np.asarray(image)
        
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
        cv2.putText(result, text="", org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50, color=(255, 0, 0), thickness=1)
        cv2.namedWindow("result", cv2.WINDOW_NORMAL)
        cv2.imshow("result", result)
        if isOutput:
            out.write(result)
            print("\r frame={} write(result)".format(v), end="")
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("break")
            break
    print("\n Finish!!")

In [None]:
detect_video(video_path=r"D:\SIGNATE\Signate_3rd_AI_edge_competition\test\video\Akiba3.mp4", output_path=r"D:\SIGNATE\Signate_3rd_AI_edge_competition\out_movie\out_Akiba3.mp4")

In [112]:
from yolo import YOLO, detect_video

## 動画切り出し

長い動画をカットします。

In [59]:
def detect_video(video_path, output_path):
    import cv2
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    video_FourCC    = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps       = vid.get(cv2.CAP_PROP_FPS)
    video_size      = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                        int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print("video_fps={}, video_size={}".format(video_fps, video_size))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()
    
    for v in range(0, int(video_fps) * 180):
        return_value, frame = vid.read()
        if v < (int(video_fps) * 60):
            continue
        
        image = Image.fromarray(frame)
        
        result = np.asarray(image)
        
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
        cv2.putText(result, text="", org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50, color=(255, 0, 0), thickness=1)
        cv2.namedWindow("result", cv2.WINDOW_NORMAL)
        cv2.imshow("result", result)
        if isOutput:
            out.write(result)
            #out.write(frame)
            print("\r frame={} write(result)".format(v), end="")
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("break")
            break
    print("\n Finish!!")

In [60]:
detect_video(video_path=r"D:\SIGNATE\Signate_3rd_AI_edge_competition\test\video\Akiba2018.mp4", output_path=r"D:\SIGNATE\Signate_3rd_AI_edge_competition\test\video\Akiba3.mp4")

video_fps=29.97002997002997, video_size=(1280, 720)
!!! TYPE: <class 'str'> <class 'int'> <class 'float'> <class 'tuple'>
 frame=5219 write(result)
 Finish!!
