# Generate videos from COTS dataset frames
I hope you find this notebook useful!

Special thanks to **CASFRANCO**, much of this code is from his notebook:
* https://www.kaggle.com/casfranco/eda-let-s-understand-the-data-protect-the-reef

In [None]:
import ast
import os
import cv2
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()

In [None]:
TRAIN_PATH = '/kaggle/input/tensorflow-great-barrier-reef'
df_train = pd.read_csv(os.path.join(TRAIN_PATH,'train.csv'))

In [None]:
def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_path(row):
    row['image_path'] = f'{TRAIN_PATH}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

In [None]:
def draw_yolox_predictions(img, bboxes, color=(45,45,252)):
    for i in range(len(bboxes)):
            box = bboxes[i]
            x0 = int(box[0])
            y0 = int(box[1])
            x1 = int(box[2])
            y1 = int(box[3])

            cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
    return img

def xywh2xyxy(bboxes):
    
    output = []
    
    for box in bboxes:   
        box[0] = box[0] #x0
        box[1] = box[1] #y0
        box[2] = box[0] + box[2] #x1
        box[3] = box[1] + box[3] #y1  
        output.append(box)
    
    return output

In [None]:
# Taken only annotated photos
df_train["num_bbox"] = df_train['annotations'].apply(lambda x: str.count(x, 'x'))

#Annotations 
df_train['annotations'] = df_train['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df_train['bboxes'] = df_train.annotations.progress_apply(get_bbox)

#Path of images
df_train = df_train.progress_apply(get_path, axis=1)

In [None]:
videos_df = []
for video_id in range(3):
    video_id_df = df_train.query("video_id==" + str(video_id))
    print("Bboxes on video " + str(video_id) + " per frame: " + str(video_id_df.count()[0]))
    print("Qty of Bboxes on video " + str(video_id) + ": " + str(video_id_df['num_bbox'].sum()))
    videos_df.append(video_id_df)

In [None]:
IMAGES_PATHS = "/kaggle/input/tensorflow-great-barrier-reef/train_images/"
%cd /kaggle/working

videos_qty = df_train['video_id'].unique().tolist()

for video_id in videos_qty:
    #Choose a video and get its df
    video_df = df_train[df_train.video_id==video_id]
    
    print("Exporting video " + str(video_id) + "...")
    out = cv2.VideoWriter('video_' + str(video_id) + '.mp4',cv2.VideoWriter_fourcc(*'MP4V'), 15, (1280,720))
    
    #Get all the sequences of that video
    video_sequences = video_df['sequence'].unique().tolist()
    
    for video_sequence in video_sequences:
        #Choose a sequence and go thru each of the video frames
        sequence_frames = df_train[df_train.sequence==video_sequence]['video_frame'].tolist()
        
        print("Writing sequence: " + str(video_sequence) + " to video " + str(video_id))
        for video_frame in tqdm(sequence_frames):
            #use that video frame to load the image
            filename = IMAGES_PATHS + 'video_' + str(video_id) + '/' + str(video_frame) +'.jpg'
            img = cv2.imread(filename)
            
            #Draw annotations to img
            img_row = df_train[df_train.image_path==filename]
            bboxes = img_row['bboxes'].values[0]
            bboxes = xywh2xyxy(bboxes)

            img = draw_yolox_predictions(img, bboxes)

            height, width, layers = img.shape
            size = (width,height)
            out.write(img)
            
    out.release()