# Surfrider Video processing notebook

This notebook aims at producing semi-synthetic dataset for training models

Make sure you have ffmpeg on your computer, for instance with:

`sudo apt-get install ffmpeg`

Then install the python package:

`pip install ffmpeg-python`

## Spliting a video into frames

(code from the mot repository)

In [None]:
import ffmpeg
import os
%matplotlib inline
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

In [None]:
def split_video(input_path, output_folder, fps=1.5, resolution=(1024, 768)):
    """Splits a video into frames

    Arguments:

    - *input_path*: string of video full path
    - *output_folder*: folder to store images
    - *fps*: float for number of frames per second
    - *resolution*: integer tuple for resolution

    """
    if not os.path.isdir(output_folder):
        os.mkdir(output_folder)
    (
        ffmpeg.input(input_path).filter(
            "scale", width="{}".format(resolution[0]), height="{}".format(resolution[1])
        ).filter("fps", fps=fps, round="up").trim(
            start_frame=0
        ).output(os.path.join(output_folder, "frame_%4d.jpeg"), format="image2",
                 vcodec="mjpeg").run()
    )


def read_folder(input_path):
    # for now, read directly from images in folder ; later from json outputs
    return [os.path.join(input_path, file) for file in sorted(os.listdir(input_path))]

In [None]:
input_path = 'real-dataset-1/b3e4d8d1-cdbc-4539-92f2-7845df756ee4_48.8068478_1.3645054.mp4'
output_folder=os.path.join(".","real-dataset-1-images")

split_video(input_path=input_path,
            output_folder=output_folder,
            fps=12,
            resolution=(960, 540))

# Read the paths
video_images =read_folder(output_folder)

## Get annotations and images from TACO

To do so, clone the TACO repository and download the images

In [None]:
dataset_path = '../TACO/data'
anns_file_path = dataset_path + '/' + 'annotations.json'

# Read annotations
with open(anns_file_path, 'r') as f:
    dataset = json.loads(f.read())

categories = dataset['categories']
anns = dataset['annotations']
imgs = dataset['images']
nr_cats = len(categories)
nr_annotations = len(anns)
nr_images = len(imgs)

# Load categories and super categories
cat_names = []
super_cat_names = []
super_cat_ids = {}
super_cat_last_name = ''
nr_super_cats = 0
for cat_it in categories:
    cat_names.append(cat_it['name'])
    super_cat_name = cat_it['supercategory']
    # Adding new supercat
    if super_cat_name != super_cat_last_name:
        super_cat_names.append(super_cat_name)
        super_cat_ids[super_cat_name] = nr_super_cats
        super_cat_last_name = super_cat_name
        nr_super_cats += 1

print('Number of super categories:', nr_super_cats)
print('Number of categories:', nr_cats)
print('Number of annotations:', nr_annotations)
print('Number of images:', nr_images)

In [None]:
# map to our categories

ids_plastic = list(range(36,42))
ids_other = [10,11,12,43,44,45,46,47,51,53]
ids_bottle = [4,5]

### Crop an image with points

Points should be a numpy array of shape (nb_points, 2)

In [None]:
def crop_img(img, pts):
    print(pts.shape)
    # get bounding box
    rect = cv2.boundingRect(pts)
    x,y,w,h = rect
    croped = img[y:y+h, x:x+w].copy()

    ## (2) make mask
    pts = pts - pts.min(axis=0)

    mask = np.zeros(croped.shape[:2], np.uint8)
    print(mask.shape)
    cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
    
    ## (3) do bit-op
    dst = cv2.bitwise_and(croped, croped, mask=mask)

    ## (4) add the alpha channel
    rgba = cv2.cvtColor(dst, cv2.COLOR_RGB2RGBA)
    rgba[:, :, 3] = mask
    return rgba

## Get a random annotation & image from TACO

In [None]:
dict_label_to_ann_ids = {"bottle":[], "fragment":[], "other":[]}
for idx, ann in enumerate(anns):
    if ann["category_id"] in ids_bottle:
        dict_label_to_ann_ids["bottle"] += [idx]
    elif ann["category_id"] in ids_plastic:
        dict_label_to_ann_ids["fragment"] += [idx]
    elif ann["category_id"] in ids_other:
        dict_label_to_ann_ids["other"] += [idx]

In [None]:
import cv2
import random

def get_random_trash(label):
    list_idx = dict_label_to_ann_ids[label]
    idx = random.choice(list_idx)
    ann = anns[idx]
    img_id = ann['image_id']
    img_path = os.path.join(dataset_path, imgs[img_id]['file_name'])

    img = cv2.imread(img_path)
    #idx_seg = random.choice(len(ann["segmentation"]))
    seg = random.choice(ann['segmentation'])
    pts = np.array(list(zip(seg[::2], seg[1::2]))).astype(int)
    return crop_img(img, pts)

trash_img = get_random_trash(label="bottle")

In [None]:
plt.imshow(trash_img);

### Overlay a transparent image onto the background image

In [None]:
def overlay_transparent(background_img, img_to_overlay_t, x, y, overlay_size=None):

    bg_img = background_img.copy()
    background_height, background_width, _ = bg_img.shape
    
    if overlay_size is not None:
        img_to_overlay_t = cv2.resize(img_to_overlay_t.copy(), overlay_size)

    # Extract the alpha mask of the RGBA image, convert to RGB 
    b,g,r,a = cv2.split(img_to_overlay_t)
    overlay_color = cv2.merge((b,g,r))
    
    # Apply some simple filtering to remove edge noise
    mask = cv2.medianBlur(a,5)

    # Border conditions
    h, w, _ = overlay_color.shape
    if x < 0:
        w = w + x
        mask = mask[:, -x:]
        overlay_color = overlay_color[:, -x:]
        x = 0
    
    if y < 0:
        h = h + y
        mask = mask[-y:, :]
        overlay_color = overlay_color[-y:, :]
        y = 0
    
    if x + w > background_width:
        w = background_width - x
        mask = mask[:, :w]
        overlay_color = overlay_color[:, :w]

    if y + h > background_height:
        h = background_height - y
        mask = mask[:h, :]
        overlay_color = overlay_color[:h, :]
    
    roi = bg_img[y:y+h, x:x+w]

    # Black-out the area behind the overlay in our original ROI
    img1_bg = cv2.bitwise_and(roi.copy(),roi.copy(),mask = cv2.bitwise_not(mask))

    # Mask out the overlay from the logo image.
    img2_fg = cv2.bitwise_and(overlay_color,overlay_color,mask = mask)

    # Update the original image with our new ROI
    bg_img[y:y+h, x:x+w] = cv2.add(img1_bg, img2_fg)

    return bg_img

In [None]:
output = cv2.resize(outp, (100,int(100*outp.shape[0]/outp.shape[1])))

video_image = cv2.imread(video_images[0])
rows,cols,channels = output.shape

output_image = overlay_transparent(video_image, output, -50, 200)
output_image.shape

In [None]:
im_rgb = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
plt.imshow(im_rgb);

In [None]:
# This class represents an object and its location within the video
# Todo add blending, data augmentation, etc...

class OverlayTrash():
    def __init__(self, trash_image, init_loc, final_loc, 
              size, init_frame, final_frame):
        
        self.trash_image = trash_image
        self.init_loc, self.final_loc = init_loc, final_loc
        self.init_frame, self.final_frame = init_frame, final_frame
        self.span_x = (final_loc[0] - init_loc[0])
        self.span_y = (final_loc[1] - init_loc[1])
        self.length = final_frame - init_frame
        self.reshape_size = (size, int(size*trash_image.shape[0]/trash_image.shape[1]))
        self.size = size
        
    def get_position(self, frame_idx):
        alpha = (frame_idx - self.init_frame) / self.length
        x = int(self.init_loc[0] + self.span_x * alpha)
        y = int(self.init_loc[1] + self.span_y * alpha)
        return (x,y)

In [None]:
def add_trash(video_images, trash_list, output_folder):
    for frame_idx, frame in enumerate(video_images):
        video_image = cv2.imread(frame)
        filename = os.path.basename(frame)
        for trash in trash_list:
            if frame_idx >= trash.init_frame and frame_idx <= trash.final_frame:
                x,y = trash.get_position(frame_idx)
                video_image = overlay_transparent(video_image, trash.trash_image, 
                                                  x, y, trash.reshape_size)
            
        cv2.imwrite(os.path.join(output_folder,filename), video_image)

In [None]:
output_folder2 = os.path.join(".","real-dataset-1-images-out")
if not os.path.isdir(output_folder2):
    os.mkdir(output_folder2)
    
trash = OverlayTrash(output,
          (950, 200),
          (-20, 400),
          70,
          20, 50)

In [None]:
add_trash(video_images[:100],
          [trash],
          output_folder2)

In [None]:
os.path.basename(video_images[0])

### Todo: save synthetic labels 

- Trash type
- which frames they appear in (first, middle, last)
- positions in each frame ?

In [None]:
labels = [{"class":"bottle", "times":[624,644,653]},]

### (optional) Generate a new video from output frames

In [None]:
(
    ffmpeg
    .input(os.path.join(output_folder2, "*.jpeg"), pattern_type='glob', framerate=12)
    .output('1-added_trash.mp4')
    .run()
)