# 导入第三方库

In [None]:
import os
import cv2
import ast
import json
import subprocess
from glob import glob
from tqdm.notebook import tqdm
from pprint import pprint
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Video

### 参数设置

In [None]:
# Root of input
INPUT_PATH = '../input/tensorflow-great-barrier-reef'
HEIGHT = 720 # image height
WIDTH  = 1280 # image width

# 输入数据

In [None]:
df_train = pd.read_csv(INPUT_PATH + '/train.csv')
display(df_train)
print(df_train.info())

In [None]:
for video_id in df_train['video_id'].unique():
    print(f'video_id: {video_id}')
    print(f'w   annotations:  {sum(df_train[df_train["video_id"]==video_id]["annotations"] == "[]")}')
    print(f'w/o annotations:  {sum(df_train[df_train["video_id"]==video_id]["annotations"] != "[]")}\n')

In [None]:
# 将'annotations'的类型从str更改为list
df_train['annotations'] = df_train['annotations'].apply(ast.literal_eval) # str -> list
# 添加列的图像路径和数量的盒子
df_train['image_path'] = INPUT_PATH + '/train_images/video_' + df_train['video_id'].astype(str) + '/' + df_train['video_frame'].astype(str) + ".jpg"
df_train['num_bboxes'] = df_train['annotations'].apply(lambda x: len(x))
display(df_train)

In [None]:
max_num_bboxes = max(df_train['num_bboxes'])
indexes = df_train[df_train['num_bboxes']==max_num_bboxes].index.values
print(f'Maximum number of bboxes in an image: {max_num_bboxes}')
display(df_train.iloc[indexes])

In [None]:
# indexes[0] 和 indexes[1] 是连续的帧
indexes = [indexes[0], indexes[2]]

# 范例图片

In [None]:
def get_bboxes(annotations):
    """
    annotations: list of annotations
    return: bboxes as [x_min, y_min, x_max, y_max]
    """
    if len(annotations)==0:
        return []
    boxes = pd.DataFrame(annotations, columns=['x', 'y', 'width', 'height']).astype(np.int32).values
    # [x_min, y_min, w, h] -> [x_min, y_min, x_max, y_max]
    boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
    boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
    return boxes   

def plot_img_and_bbox(img_path, anntations):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(1, 1, figsize=(16,10))
    if len(annotations)>0:
        bboxes = get_bboxes(annotations)
        for i, box in enumerate(bboxes):
            # pur bbox on image
            cv2.rectangle(img,
                          (box[0], box[1]),
                          (box[2], box[3]),
                          color = (255, 0, 0),
                          thickness = 2)
            # numbering
            ax.text(box[0], box[1]-5, i+1, color='red')

    ax.set_axis_off()
    ax.imshow(img)


def zoom_bbox(img_path, annotations):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    bboxes = get_bboxes(annotations)
    
    col = 6
    row = np.ceil(len(bboxes)//6).astype(int)
    fig, ax = plt.subplots(row, col, figsize=(16,9))
    cnt = 0
    for i in range(row):
        if cnt >= len(bboxes):
            break
        for j in range(col):
            bbox = bboxes[cnt]
            sliced_img = img[bbox[1]:bbox[3], bbox[0]:bbox[2]]
            ax[i,j].imshow(sliced_img)
            ax[i,j].set_title(cnt+1, color='red')
            ax[i,j].set_axis_off()
            cnt += 1
    plt.show() 

In [None]:
samples = df_train.iloc[indexes].copy()
for idx, row in samples.iterrows():
    img_path    = row['image_path']
    annotations = row['annotations']
    print('image_id:', row['image_id'])
    # plot image with bboxes
    plot_img_and_bbox(img_path, annotations)
    # plot zoom of bboxes
    zoom_bbox(img_path, annotations)

# 制作视频
生成300帧视频围绕图像与最大数量的bboxes。


In [None]:
def get_img_with_annotations(img_path, annotations):
    img = cv2.imread(img_path)
    video_id = img_path.split('/')[-2].split('_')[-1]
    frame_id = img_path.split('/')[-1].split('.')[0]
    img_id = video_id + '-' + frame_id
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if len(annotations)>0:
        bboxes = get_bboxes(annotations)
        for i, box in enumerate(bboxes):
            # put bbox
            cv2.rectangle(img,
                          (box[0], box[1]),
                          (box[2], box[3]),
                          color = (0, 0, 255),
                          thickness = 2)
    # put image_id, #bbox
    cv2.putText(img,
                f'image_id: {img_id}, #bbox: {len(annotations)}',
                org = (30, 50), 
                color = (0, 0, 255), 
                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=1.0,
                thickness=3)
    
    return img

def make_video(df, video_id, start_frame, end_frame, fps=15, width=WIDTH, height=HEIGHT):
    '''
    df          : DataFrame
    video_id    : 0, 1, or 2
    start_frame : video_frame at start of video
    num_frame   : video_frame at end of video
    return      : path to video
    '''
    video_path = f'video_{video_id}_{start_frame}_to_{end_frame}.mp4' # video after encode
    tmp_path = 'tmp_' + video_path # video before encode (removed after encode)
    video = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    
    df = df[df['video_id']==video_id].reset_index(drop=True)
    start_idx = df[df['video_frame']==start_frame].index[0]
    end_idx   = df[df['video_frame']==end_frame].index[0]
    df = df.iloc[start_idx:end_idx]
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        image_path  = row['image_path']
        annotations = row['annotations']
        frame = get_img_with_annotations(image_path, annotations)
        video.write(frame)
    
    video.release()
    
    if os.path.exists(video_path):
        os.remove(video_path)
    
    # encode by ffmpeg command 
    subprocess.run(
        ['ffmpeg', 
         '-i', tmp_path, 
         '-loglevel', 'quiet', 
         '-crf', '18', 
         '-preset', 'veryfast', 
         '-vcodec', 'libx264', 
         video_path]
    )
    os.remove(tmp_path)
    
    return video_path

In [None]:
video_paths = []
for idx in indexes:
    video_id    = df_train.loc[idx, 'video_id']
    start_frame = df_train.loc[idx, 'video_frame'] - 100 # peek before 100 frames
    end_frame   = df_train.loc[idx, 'video_frame'] + 200 # peek after 200 frames
    print(f'video_id: {video_id}, video_frame: {start_frame} to {end_frame}')
    print('Create video ...')
    video_path = make_video(df_train,
                            video_id=video_id,
                            start_frame=start_frame,
                            end_frame=end_frame)
    video_paths.append(video_path)

### 第一个视频

In [None]:
Video(video_paths[0], width=WIDTH*0.7, height=HEIGHT*0.7)

<span style="font-size: 120%;">The change from id=1-9071 to 9072 (around at 3 sec in this video) is small but the number of bboxes jumps up from 4 to 7 as shown below, so some starfishes are not annotated in id=1-9071. </span>

In [None]:
img_ids = ['1-9071', '1-9072']
fig, ax = plt.subplots(1, 2, figsize=(20,10))
for i, img_id in enumerate(img_ids):
    img_path    = df_train[df_train['image_id'].str.contains(img_id)]['image_path'].values[0]
    annotations = df_train[df_train['image_id'].str.contains(img_id)]['annotations'].values[0]
    img = get_img_with_annotations(img_path, annotations)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    ax[i].imshow(img)
    ax[i].set_axis_off()
plt.show()

### 第二个视频

In [None]:
Video(video_paths[1], width=WIDTH*0.7, height=HEIGHT*0.7)

类似地，在id=2-5715和5721之间，bbox的数量从5个更改为8个

In [None]:
img_ids = ['2-5715', '2-5721']
fig, ax = plt.subplots(1, 2, figsize=(20,10))
for i, img_id in enumerate(img_ids):
    img_path    = df_train[df_train['image_id'].str.contains(img_id)]['image_path'].values[0]
    annotations = df_train[df_train['image_id'].str.contains(img_id)]['annotations'].values[0]
    img = get_img_with_annotations(img_path, annotations)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    ax[i].imshow(img)
    ax[i].set_axis_off()
plt.show()