# DFL benchmark - training

In [7]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
# from IPython.display import Video
import cv2

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configuration

In [8]:
DEBUG = False
ALLDATA_USE = False

In [9]:
class CFG:    
    IMAGE_SIZE = 256
    NUM_IMAGES = 8
    INPUT_CHANNEL = 3
    
    SAVE_IMAGE_INTERVAL = 1.5

## train valid data setting

In [10]:
err_tol = {
    'challenge': [ 0.30, 0.40, 0.50, 0.60, 0.70 ],
    'play': [ 0.15, 0.20, 0.25, 0.30, 0.35 ],
    'throwin': [ 0.15, 0.20, 0.25, 0.30, 0.35 ]
}

if DEBUG:
    video_id_split = {
        'val':['3c993bd2_0'],
        'train':['1606b0e6_0']
    }
else:
    video_id_split = {
        'val':['3c993bd2_0','3c993bd2_1'],
        'train':['1606b0e6_0','1606b0e6_1']
    }
    
#                  '35bd9041_0','35bd9041_1',
#                  '407c5a9e_1','4ffd5986_0','cfbe2e94_0','cfbe2e94_1',
#                  '9a97dae4_1','ecf251d4_0',]
    
    
if ALLDATA_USE:
    video_id_split = {
        'val':['3c993bd2_0','3c993bd2_1'],
        'train':['1606b0e6_0','1606b0e6_1','35bd9041_0','35bd9041_1',
                 '407c5a9e_1','4ffd5986_0','cfbe2e94_0','cfbe2e94_1',
                 '9a97dae4_1','ecf251d4_0']
    }

event_names = ['challenge', 'throwin', 'play']

# Load Data

In [11]:
df = pd.read_csv("/workdir/work/input/train.csv")
df = df[["video_id", "time", "event"]]
display(df)

Unnamed: 0,video_id,time,event
0,1606b0e6_0,200.265822,start
1,1606b0e6_0,201.150000,challenge
2,1606b0e6_0,202.765822,end
3,1606b0e6_0,210.124111,start
4,1606b0e6_0,210.870000,challenge
...,...,...,...
11213,ecf251d4_0,3056.587000,challenge
11214,ecf251d4_0,3058.072895,end
11215,ecf251d4_0,3068.280519,start
11216,ecf251d4_0,3069.547000,throwin


In [12]:
result_dir = f"/workdir/work/output/3dcnn_traindata"
if not os.path.exists(result_dir):
    os.makedirs(result_dir, exist_ok=True)

In [13]:
def extract_training_images(args):
    saved_frame_list = []
    saved_img_list = []
    video_id, split = args
    video_path = f"/workdir/work/input/train/{video_id}.mp4"
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        TODO
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    time_interval = 1/fps
    df_video = df[df.video_id == video_id]

    #crr_statu => background, play, challenge, throwin
    time_and_event = df_video[['time','event']].values
    print(f"total frame={total_frame}, {len(time_and_event)} events")

    event_idx = 0
    frame_num = 0
    while True:
        if DEBUG and frame_num > 100:
            break
        
        current_time = frame_num/fps
        next_event_time = time_and_event[event_idx, 0]
        if current_time >= next_event_time:
            event_idx += 1
            if event_idx >= len(time_and_event):
                break
            next_event_time = time_and_event[event_idx, 0]
        # current_event = time_and_event[event_idx, 1]
    
        if time_and_event[event_idx, 1] == 'start':
            crr_status = 'background'
        elif time_and_event[event_idx, 1] == 'end':
            crr_status = time_and_event[event_idx-1, 1]
        else:
            crr_status = time_and_event[event_idx, 1]
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
#         file_name_list = []
        file_name_list = ""
        for i in range(CFG.NUM_IMAGES):
            successed, img = cap.read()
            if not successed:
                break
            save_frame = frame_num + i
            out_file = f'{result_dir}/{video_id}_{save_frame:06}.jpg'
            if not save_frame in saved_img_list:
                cv2.imwrite(out_file, img)
                saved_img_list.append(save_frame)
            file_name = f"{video_id}_{save_frame:06}.jpg"
            if len(file_name_list) == 0:
                file_name_list = out_file
            else:
                file_name_list += "," + out_file
#             file_name_list.append([file_name])

        if i == CFG.NUM_IMAGES - 1:
            saved_frame_list.append([video_id, frame_num, file_name_list, crr_status])
        
        if crr_status == "back_ground":
            frame_num += int(fps*CFG.SAVE_IMAGE_INTERVAL)
        else:
            frame_num += 1
    
    return saved_frame_list

In [14]:
saved_df = pd.DataFrame()

for split in video_id_split:
    video_ids = video_id_split[split]
    print(video_ids[0])
    for video_id in video_ids:            
        saved_list = extract_training_images([video_id, split])
        if len(saved_df) > 0:
            tmp = pd.DataFrame(saved_list, columns=["video_id", "frame", "file_name", "event"])
            saved_df = pd.concat([saved_df, tmp])
        else:
            saved_df = pd.DataFrame(saved_list, columns=["video_id", "frame", "file_name", "event"])
display(saved_df)
saved_df.to_csv("/workdir/work/output/3dcnn_train_images.csv", index=False)
print('done')

3c993bd2_0
total frame=89750.0, 1042 events
total frame=88760.0, 966 events
1606b0e6_0
total frame=85915.0, 1000 events
total frame=85138.0, 1249 events


Unnamed: 0,video_id,frame,file_name,event
0,3c993bd2_0,0,/workdir/work/output/3dcnn_traindata/3c993bd2_...,background
1,3c993bd2_0,1,/workdir/work/output/3dcnn_traindata/3c993bd2_...,background
2,3c993bd2_0,2,/workdir/work/output/3dcnn_traindata/3c993bd2_...,background
3,3c993bd2_0,3,/workdir/work/output/3dcnn_traindata/3c993bd2_...,background
4,3c993bd2_0,4,/workdir/work/output/3dcnn_traindata/3c993bd2_...,background
...,...,...,...,...
84689,1606b0e6_1,84689,/workdir/work/output/3dcnn_traindata/1606b0e6_...,play
84690,1606b0e6_1,84690,/workdir/work/output/3dcnn_traindata/1606b0e6_...,play
84691,1606b0e6_1,84691,/workdir/work/output/3dcnn_traindata/1606b0e6_...,play
84692,1606b0e6_1,84692,/workdir/work/output/3dcnn_traindata/1606b0e6_...,play


done


In [15]:
saved_df["event"].value_counts()

background    221634
play           81774
challenge      15001
throwin         3440
Name: event, dtype: int64

In [16]:
saved_df["file_name"].iloc[0]

'/workdir/work/output/3dcnn_traindata/3c993bd2_0_000000.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000001.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000002.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000003.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000004.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000005.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000006.jpg,/workdir/work/output/3dcnn_traindata/3c993bd2_0_000007.jpg'