In [1]:
import os
from glob import glob
import numpy as np
import json

import cv2
from tqdm.notebook import tqdm

import ffmpeg_utils
from augment_script import process_video, process_images, create_folder
from augment import Augmentations
from reader import VideoEffectReader, ImageEffectReader
from writer import COCO_writer
from bbox_utils import get_scale_ratio, resize_by_max_side

### Preprocess effects

In [2]:
e_type = 'fire'
e_type = 'smoke'
e_type = 'real_fire_smoke'

def get_effect_paths(e_type):
    assert e_type in ['fire', 'smoke', 'real_fire_smoke'], 'Unsupported'
    raw_png_effects_path  = f'effects/raw_{e_type}_images'
    prep_png_effects_path = f'effects/prep_{e_type}_images'
    raw_mov_effects_path  = f'effects/raw_{e_type}_vid'
    prep_mov_effects_path = f'effects/prep_{e_type}_vid'
    return (raw_png_effects_path, prep_png_effects_path,
            raw_mov_effects_path, prep_mov_effects_path)

(raw_png_effects_path, prep_png_effects_path,
 raw_mov_effects_path, prep_mov_effects_path) = get_effect_paths(e_type)


vid_out_path = 'output/out.mp4'
annot_out_path = 'output/annotations/instances_default.json'

# Create folders
# for path in [raw_png_effects_path, prep_png_effects_path,
#              raw_mov_effects_path, prep_mov_effects_path,
#              os.path.split(vid_out_path)[0],
#              os.path.split(annot_out_path)[0]]:
#     os.makedirs(path, exist_ok=True)

In [3]:
# Rename and trim empty pixels
raw_png_effects_path = 'effects/non_funny'
prep_png_effects_path = 'effects/prep_non_funny'
e_type = 'non-funny'
# for i, effect_path in enumerate(glob(os.path.join(raw_png_effects_path, '*.png'))):
#     e_img = cv2.imread(effect_path, cv2.IMREAD_UNCHANGED)
#     # Trim empty pixels
#     y, x = e_img[:, :, 3].nonzero()
#     minx, miny = np.min(x), np.min(y)
#     maxx, maxy = np.max(x), np.max(y)
#     e_img = e_img[miny:maxy, minx:maxx]
#     # Resize to 512px
#     scale_ratio = get_scale_ratio(e_img, 512)
#     e_img = resize_by_max_side(e_img, scale_ratio)
#     os.makedirs(prep_png_effects_path, exist_ok=True)
#     cv2.imwrite(os.path.join(prep_png_effects_path, f'{e_type}-{i}.png'), e_img)

In [4]:
# Rename videos
# for i, mov_path in enumerate(glob(os.path.join(raw_mov_effects_path, '*.mov'))):
#     print(mov_path)
#     path = os.path.split(mov_path)[0]
#     os.rename(mov_path, os.path.join(path, f'{e_type}-{i}.mov'))

In [5]:
# Resize and extract alpha videos
rewrite = False
# for input_path in glob(os.path.join(raw_mov_effects_path, '*.mov')):
#     input_path = os.path.abspath(input_path)
#     filename = os.path.splitext(os.path.split(input_path)[1])[0]
#     out_path = os.path.join(prep_mov_effects_path, filename + '.webm')
#     out_path = os.path.abspath(out_path)
#     ffmpeg_utils.convert_mov2webm(input_path, out_path, y=rewrite)
#     ffmpeg_utils.extract_alpha(out_path, y=rewrite)

In [6]:
# e_paths = get_effect_paths('fire')
e_paths = get_effect_paths('real_fire_smoke')
prep_e_png_fire_path, prep_e_mov_fire_path = e_paths[1], e_paths[3]
e_paths = get_effect_paths('smoke')
prep_e_png_smoke_path, prep_e_mov_smoke_path = e_paths[1], e_paths[3]

In [4]:
e_png_fire = glob(os.path.join(prep_e_png_fire_path, '*.png'))
e_png_fire = glob(os.path.join('effects/animals/retrieved', '*.png'))
e_mov_fire = glob(os.path.join(prep_e_mov_fire_path, '*.webm'))
e_png_smoke = glob(os.path.join(prep_e_png_smoke_path, '*.png'))
e_mov_smoke = glob(os.path.join(prep_e_mov_smoke_path, '*.webm'))
e_png_fire[:15], e_mov_fire[:15], e_png_smoke[:15], e_mov_smoke[:15]

NameError: name 'prep_e_png_fire_path' is not defined

### Augment

In [2]:
source_videos = glob('source_videos/*')
source_videos

['source_videos\\2020-06-23_16-40-40.mp4',
 'source_videos\\stream_OV1_2020-08-01_09_58_11.ts.mp4',
 'source_videos\\stream_OV1_2020-08-01_09_58_50.ts.mp4',
 'source_videos\\ufa']

In [3]:
# COCO_writer
coco_writer = COCO_writer([
    {
        'name': 'Fire',
        'supercategory': '',
        'id': 2,
    },
    {
        'name': 'Smoke',
        'supercategory': '',
        'id': 4,
    },
])

e_readers = [
#     VideoEffectReader(e_mov_fire[1:2]),
    ImageEffectReader(e_png_fire, annot_type='coco', preload=False),
]


# Augmentations
fire_augment = Augmentations(
    e_readers,
    config_path='augment_config.yaml',
    mov_min_size= 300,
    mov_max_size= 900,
    do_resize=True,
    do_flip=True,
    do_rotate=True,
    do_brightness=True,
    do_gamma=True,
    do_blur=True,
    blur_radius=11,
    contour_radius=10,
    debug_level=2,
    min_n_objects=10,
    max_n_objects=10,
    use_alpha=True,
    min_transparency=100,
    max_transparency=100,
    ck_start=1,
    ck_range=10,
    min_duration=1,
    max_duration=2,
)

# smoke_augment = Augmentations(
#     e_png_smoke,
#     e_mov_smoke,
#     config_path='augment_config.yaml',
#     do_resize=True,
#     do_flip=True,
#     do_rotate=False,
#     do_contrast=False,
#     debug_level=2,
#     ck_start=2,
#     ck_range=15,
#     max_n_objects=3,
#     use_alpha=True
# )

augmentations = [
    fire_augment,
#     smoke_augment
]

# process_video(source_videos[0], augmentations, vid_out_path, coco_writer, show_debug=True)

# # Write annotations.
# os.makedirs(os.path.split(annot_out_path)[0], exist_ok=True)
# coco_writer.write_result(annot_out_path)

NameError: name 'e_png_fire' is not defined

In [16]:
process_video(source_videos[0], augmentations, vid_out_path, coco_writer, show_debug=True, write_debug=False)

  1%|▉                                                                           | 184/15004 [01:36<2:09:26,  1.91it/s]


In [83]:
# Fix for `id=0` for all annotations
for path in glob('new_fire/annotations/smoke.json'):
    print(path)
    with open(path) as in_file:
        j = json.load(in_file)
    for i, annot in enumerate(j['annotations']):
        annot['id'] = i
        j['annotations'][i] = annot
    
    with open(path, 'w') as out_file:
        out_file.write(json.dumps(j))

new_fire/annotations/smoke.json


In [30]:
from pytorchcv.model_provider import get_model as ptcv_get_model
import torch

net = ptcv_get_model("squeezenet_v1_1", pretrained=True).cuda()
# net = ptcv_get_model("mobilenetv3_large_w1", pretrained=True).cuda()
# net = ptcv_get_model("efficientnet_b4b", pretrained=True).cuda()
# net = ptcv_get_model("squeezenet_v1_0", pretrained=True)
# net = ptcv_get_model("shufflenet_g1_w1", pretrained=True)

In [38]:
from bbox_utils import convert_xywh_xyxy, convert_xyxy_xywh, blur_contour
import math

from pycocotools.coco import COCO
# animals_coco = COCO('effects/animals/animals.json')
writer = COCO_writer(animals_coco.dataset['categories'])

create_folder('effects/animals/retrieved', clean_out=True)

is_exit = False
pbar = tqdm(total=len(animals_coco.imgs))
for img_id, img_info in animals_coco.imgs.items():
    read_img_name = os.path.split(img_info['image'])[1]
    path = 'effects/animals/animals/' + read_img_name
    image = cv2.imread(path)
    ann_ids = animals_coco.getAnnIds(imgIds=img_id, iscrowd=None)
    for obj in animals_coco.loadAnns(ann_ids):
        if obj['area'] < 3000:
            continue
        if len(obj['segmentation']) > 1:
            continue
        mask = np.zeros((*image.shape[:2], 1), dtype=np.uint8)
        segments = []
        for segment in obj['segmentation'][:1]:
            segment = np.array(segment).reshape(-1, 1, 2).astype(np.int32)
            segments.append(segment)
        segments = np.array(segments).reshape(-1, 1, 2)
        cv2.fillPoly(mask, [segments], (255))
        img = image.copy()
        img = np.concatenate((img, mask), -1)
        img = blur_contour(img, blur_radius=21, contour_radius=15, blur_image=False)
        
        y, x = img[:, :, 3].nonzero()
        minx, miny = np.min(x), np.min(y)
        maxx, maxy = np.max(x), np.max(y)
        img = img[miny:maxy, minx:maxx]
        out_img = img.copy()
        img, mask = img[:, :, :3].copy(), img[:, :, 3:].copy()
        
        # -------------------- Classification
        mean_rgb = (0.485, 0.456, 0.406)
        std_rgb = (0.229, 0.224, 0.225)
#         x = image
        x = img #(img * (mask / 255)).astype(np.uint8)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
        x = cv2.resize(x, (224, 224))
        # Convert image to a float tensor and normalize it:
        model_image = x.copy()
        x = x.astype(np.float32)
        x = x / 255.0
        x = (x - np.array(mean_rgb)) / np.array(std_rgb)
        x = x.transpose(2, 0, 1)
        x = np.expand_dims(x, axis=0)
        x = torch.Tensor(x)
        y = net(x.cuda()).detach().cpu()
        y = torch.nn.Softmax(dim=-1)(y)
        predicted = torch.argsort(y[0], descending=True)[:5]
        predicted = [yi.item() for yi in predicted[predicted > 0.05]]
        if not good_classes.intersection(predicted):
            continue
        new_img_id, filename = writer.add_frame(*img.shape[:2], file_ext='png')
        track_id = new_img_id
        category_id = writer.get_cat_id(animals_coco.cats[obj['category_id']]['name'])
        cv2.imwrite(f'effects/animals/retrieved/{filename}', out_img)
        pbar.set_description(f'Processed {new_img_id}')
        
        if len(segments.shape) > 3:
            print('OH, NO!')
            print(segments.shape)
        segments = np.array(segments).reshape(-1, 1, 2) - (minx, miny)
        h, w = img.shape[:2]
        segments[:, :, 0] = np.clip(segments[:, :, 0], 0, w - 1)
        segments[:, :, 1] = np.clip(segments[:, :, 1], 0, h - 1)
        bbox = cv2.boundingRect(segments.astype(np.int32))
        writer.add_annotation(new_img_id, bbox, track_id, category_id, segments)
        if new_img_id % 500 == 0:
            writer.write_result('effects/animals/retrieved.json')
#         bbox = convert_xywh_xyxy(bbox, *img.shape[:2][::-1])
#         cv2.rectangle(img, tuple(bbox[:2]), tuple(bbox[2:4]), (22, 48, 163), 2)
#         for segment in segments:
#             cv2.drawContours(img, segments, -1, (0, 0, 255), 4)
#         cv2.imshow('orig_image', image)
#         cv2.imshow('model_image', model_image)
#         cv2.imshow('img', (img * (mask / 255)).astype(np.uint8))
#         cv2.imshow('mask', mask)
#         key = cv2.waitKey(0) & 0xFF
#         if key == ord('q'):
#             print('Exit!')
#             is_exit = True
#             break
#     if is_exit:
#         break
    pbar.update(1)
cv2.destroyAllWindows()
writer.write_result('effects/animals/retrieved.json')

HBox(children=(FloatProgress(value=0.0, max=19267.0), HTML(value='')))

In [281]:
for idx in (y[0] > 0.01).nonzero():
    idx = idx.item()
#     idx = (y[0] == yi).nonzero().item()
    print(classes[str(idx)])

flatworm, platyhelminth
nematode, nematode worm, roundworm
Italian greyhound
Labrador retriever
Doberman, Doberman pinscher
miniature pinscher
Great Dane
dalmatian, coach dog, carriage dog
buckle
chain
corkscrew, bottle screw
cornet, horn, trumpet, trump
electric guitar
flute, transverse flute
microphone, mike
sax, saxophone
chocolate sauce, chocolate syrup
cup


In [195]:
effect_path = 'effects/prep_real_fire_smoke_vid/fire-3'
cap = cv2.VideoCapture(f'{effect_path}.webm')
alpha_cap = cv2.VideoCapture(f'{effect_path}_alpha.mp4')
while True:
    ret, e_frame = cap.read()
    a_ret, e_alpha = alpha_cap.read()
    init_e_alpha = e_alpha.copy()
    if not ret or not a_ret:
        print(ret, a_ret)
        break
#     rgb_sum = np.expand_dims(np.sum(e_alpha, axis=2), -1)
#     e_alpha = np.clip(rgb_sum, 0, 255, dtype=np.uint8)
    e_alpha = e_alpha[:, :, :1]
    print(e_alpha.shape)
    e_mask = np.ones(e_alpha.shape, dtype=np.uint8) * 255
    e_frame = np.concatenate((e_frame, e_alpha), axis=2)
    # Blur
    blur_radius = 11
    contour_radius = 5
    image, alpha = e_frame[:, :, :3].copy(), e_frame[:, :, 3:].copy()

    contours, _ = cv2.findContours(alpha.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    print(contours[0].shape, contours[0].dtype)
    cv2.boundingRect(contours[0])
    
    b_rad = (blur_radius, blur_radius)
    blurred_img = image#cv2.GaussianBlur(image, b_rad, 0)
    alpha[:, :, 0] = cv2.GaussianBlur(alpha, b_rad, 0)

    mask = np.zeros((*image.shape[:2], 1), np.ubyte)
    cv2.drawContours(mask, contours, -1, (1), contour_radius)
    output = np.where(mask, blurred_img, image)
    print((alpha / 255).max(), np.median(alpha / 255))
    print(output.max())
    output[...] = output * (alpha / 255)
    print(output.max())
    cv2.imshow('alpha', init_e_alpha)
    cv2.imshow('mask', mask * 255)
    cv2.imshow('output', output)
    cv2.imshow('e_frame', e_frame)
    
    cv2.waitKey(0)
cv2.destroyAllWindows()

(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(10, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(10, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(9, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(9, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(6, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(13, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(21, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(4, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(8, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(18, 1, 2) int32
0.9921568627450981 0.0
255
253
(500, 720, 1)
(20, 

KeyboardInterrupt: 

In [196]:
cv2.destroyAllWindows()

In [None]:
%load_ext line_profiler
%load_ext memory_profiler

In [None]:
from augment import Augmentations
from augment_script import _process_image
from bbox_utils import (convert_xywh_xyxy, convert_xyxy_xywh,
                        resize, rotate, flip, gamma_correction,
                        blur_contour)

augment = Augmentations.augment
merge_images = Augmentations.merge_images

In [None]:
%lprun -f blur_contour process_video(source_videos[0], augmentations, vid_out_path, coco_writer, show_debug=False)

In [None]:
images = glob(os.path.join('ufa/images', f'*.jpg'))
out_path = 'output/test'
show_debug, write_debug = False, False
%lprun -f process_images process_images(images, augmentations, out_path, coco_writer, show_debug, write_debug, n_workers=None)

In [44]:
a = np.random.normal(loc=0.8, scale=0.05, size=10000)
b = np.random.normal(loc=0, scale=2.5, size=10000)
print(a.min(), a.max(), a.mean())
print(b.min(), b.max(), b.mean())

0.5922650026558175 0.9899783176964153 0.8003302973044949
-9.670270866233837 8.323729912185293 -0.022608726770813894


In [32]:
cv2.destroyAllWindows()

### Manually adding effects

In [2]:
import os
from glob import glob
import numpy as np
import json
from itertools import cycle

import cv2
from tqdm.notebook import tqdm

import ffmpeg_utils
from augment_script import process_video, process_images, create_folder, draw_debug, VideoEffectReader, get_coco_writer
from augment import Effect, Augmentations, AugmentationConfig
from reader import VideoEffectReader, ImageEffectReader
from writer import COCO_writer
from bbox_utils import (convert_xywh_xyxy, convert_xyxy_xywh,
                        blur_contour, resize, rotate, flip,
                        get_scale_ratio, resize_by_max_side,
                        gamma_correction)

In [3]:
def make_effect(reader_id, idx, track_id, angle=0, gain=1, bias=0, gamma=1, size=800, duration=None, offset=None):
    duration = duration if duration is not None else 30 * 3
    offset = offset if offset is not None else (700, 700)
    return Effect(
    reader_id = reader_id,
    idx = idx,
    track_id = track_id,
    size = size,
    offset = offset,
    angle = angle,
    is_flip = False,
    transparency = 100 / 100, 
    gain = gain,
    bias = bias,
    gamma = gamma,
    duration = duration,
    cur_dur = 0,
)

In [47]:
class aug_new:
    
    def __init__(self):
        self.gains = [g for g in np.linspace(1, 1.6, 3)]
        self.biases = [b for b in np.linspace(0, 20, 2)]
        self.gammas = [g for g in np.linspace(0.4, 1.5, 3)]
        
        self.sizes = [s for s in np.linspace(50, 300, 4)]        
        self.sizes = [20,30, 40, 50, 70, 100, 120, 150, 200]
        
        self.n_line = 4
        self.y_pos = 500
        self.x_poses = cycle([int(x_pos) for x_pos in np.linspace(150, 1550, self.n_line)])
        self.x_poses = cycle([700])
        self.idxs = cycle([*range(84)])
    
    def get_iter(self):
        reader_id, idx = 0, 0
        effects = []
        for bias in self.biases:
            for gain in self.gains:
                for gamma in self.gammas:
                    for size in self.sizes:
                        track_id = 0
                        effects.append(make_effect(reader_id, next(self.idxs), track_id,
                                                   gain=gain, bias=bias, gamma=gamma,
                                                   offset=(next(self.x_poses), self.y_pos),
                                                   size=size, duration=5))
                        if len(effects) >= self.n_line:
                            yield effects
                            effects = []
        if len(effects):
            yield effects

In [48]:
len([*aug_new().get_iter()])

450

In [56]:
def check_box_sizes(effect_path, size):
    e_reader = VideoEffectReader([effect_path], use_alpha=True)
    e_info = make_effect(0, 0, 0, size=size)
    min_sizes, max_sizes = [], []
    while(True):
        frame, segments, e_cats = e_reader.get_frame(e_info, read_annot=True)
        e_info.cur_dur += 1
        segments = np.array([np.array(segment, dtype=np.float32) for segment in segments], dtype=object) if segments else None
        if frame is None:
            break
        if segments is not None:
            frame, _, segments = resize(frame, e_info.size, segments=segments)
            for poly in segments:
                bbox = cv2.boundingRect(poly.astype(np.int32))
                min_sizes.append(min(bbox[2:]))
                max_sizes.append(max(bbox[2:]))
    return max_sizes, min_sizes

In [60]:
size = 200
needed_size = 160
max_sizes, min_sizes = check_box_sizes('effects/vdb_fire/gasoline_1.webm', size)
needed_size * size / max(min_sizes)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


432.43243243243245

In [54]:
in_video_path = 'source_videos/2020-06-23_16-40-40.mp4'
# in_video_path = 'source_videos/stream_OV1_2020-08-01_09_58_11.ts.mp4'
# in_video_path = 'source_videos/stream_OV1_2020-08-01_09_58_50.ts.mp4'
out_path = 'out/out.mp4'
write_debug = False
writer = get_coco_writer()

cfg = AugmentationConfig()
cfg.__dict__.update({
    'do_resize': True,
    'do_flip': False,
    'do_rotate': False,
    'do_brightness': True,
    'do_gamma': True,
    'do_blur': True,
    'min_bbox_size': 40,
})

# e_reader = VideoEffectReader(['effects/vdb_fire/gasoline_3.webm'])
# e_reader = VideoEffectReader(['effects/vdb_fire/gasoline_1.webm'], use_alpha=True)
e_reader = ImageEffectReader(glob('effects/random_objects/*'), annot_type=None)

# Augmentations
augment = Augmentations(
    [e_reader],
    configs=[cfg],
    min_n_objects=1,
    max_n_objects=1,
    debug_level=0,
    gen_prob=1,
    next_gen_prob=0,
)

in_stream = cv2.VideoCapture(in_video_path)
is_exit = False

# Create writer
frame_width = int(in_stream.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(in_stream.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_rate = int(in_stream.get(cv2.CAP_PROP_FPS))
total_frames = int(in_stream.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'MPEG')
out_stream = cv2.VideoWriter(out_path, fourcc, frame_rate, (frame_width, frame_height))

pbar = tqdm(total=total_frames, desc=f'Processing {in_video_path}')
effects_iter = aug_new().get_iter()
counter = 0
try:
    while in_stream.isOpened():
        _, image = in_stream.read()
        if image is None:
            tqdm.write("No image in the stream, stopping.")
            break
        
        if len(augment.objects) == 0:
            effects = next(effects_iter)
            for effect in effects:
                augment.add_effect(effect)
#             augment.add_effect(make_effect(0, 0, 0, 0, 1, 0, 1, offset=(1920//2, 1000), size=150))
        
        frame, debug_frame = augment.augment(image, writer=writer, frame_num=0)
        if debug_frame is None:
            debug_frame = frame
        
        out_stream.write(debug_frame if write_debug else frame)
        pbar.update(1)
        counter += 1
        if draw_debug(debug_frame):
            break
except StopIteration:
    tqdm.write('Stop Iteration.')
except KeyboardInterrupt:
    tqdm.write('Exited.')
    is_exit = True
finally:
    # Close streams.
    pbar.close()
    tqdm.write('Closing streams...')
    in_stream.release()
    out_stream.release()
    cv2.destroyAllWindows()

HBox(children=(FloatProgress(value=0.0, description='Processing source_videos/2020-06-23_16-40-40.mp4', max=15…


Closing streams...


In [13]:
in_stream.release()
out_stream.release()
cv2.destroyAllWindows()

### Random things

In [1]:
import os
from glob import glob
import numpy as np
import json
from itertools import cycle

import cv2
from tqdm.notebook import tqdm

import ffmpeg_utils
from augment_script import process_video, process_images, create_folder, draw_debug, VideoEffectReader, get_coco_writer
from augment import Effect, Augmentations, AugmentationConfig
from reader import VideoEffectReader, ImageEffectReader
from writer import COCO_writer
from bbox_utils import (convert_xywh_xyxy, convert_xyxy_xywh,
                        blur_contour, resize, rotate, flip,
                        get_scale_ratio, resize_by_max_side,
                        gamma_correction)


def transform_effect(e_image, e_info, segments, cfg):
    # Transparency
    e_image[:, :, 3:] = e_image[:, :, 3:] * e_info.transparency

    # Flip image
    if cfg.do_flip and e_info.is_flip:
        e_image, _, segments = flip(e_image, segments=segments)

    # Contrast & Brightness
    if cfg.do_brightness:
        e_image[:, :, :3] = cv2.convertScaleAbs(
            e_image[:, :, :3], alpha=e_info.gain, beta=e_info.bias)

    # Gamma correction
    if cfg.do_gamma:
        e_image[:, :, :3] = gamma_correction(
            e_image[:, :, :3], e_info.gamma)

    # Rotate image
    if cfg.do_rotate and e_info.angle:
        e_image, _, segments = rotate(e_image, e_info.angle, segments=segments)

    if cfg.do_blur:
        e_image = blur_contour(e_image,
            cfg.blur_radius, cfg.contour_radius, blur_image=False)

    # Resize image
    
    print(e_image.shape)
    if cfg.do_resize:
        e_image, _, segments = resize(e_image, e_info.size, segments=segments)
    
    offsetx = np.random.uniform(-30, 30)
    offsety = np.random.uniform(-200, -120)
    offsetx = e_image.shape[1] * (offsetx / 100)
    offsety = e_image.shape[0] * (offsety / 100)
    
    e_image = add_shadow(e_image, (offsetx, offsety))
    return e_image, segments

In [290]:
def warpImage(img, shadow_off, obj_off, old_shape, start_pos=0):
    h, w = old_shape
    ow, oh = obj_off
    offx, offy = shadow_off
    print('start_pos', start_pos)
    pts1 = np.float32([[ow, oh],           [ow+w-1, oh],           [ow, oh+h-1-start_pos], [ow+w-1, oh+h-1-start_pos]])
    pts2 = np.float32([[ow-offx, oh-offy], [ow+w-1-offx, oh-offy], [ow, oh+h-1-start_pos], [ow+w-1, oh+h-1-start_pos]])
    print(pts1)
    print(pts2)
    M = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, M, img.shape[:2][::-1])
    return dst


def trim_empty(image, segments=None, tolerance=0):
    # img is 2D or 3D image data
    mask = image > tolerance
    if image.ndim == 3:
        mask = mask.all(2)
    m, n = mask.shape
    mask0, mask1 = mask.any(0), mask.any(1)
    col_start, col_end = mask0.argmax(), n - mask0[::-1].argmax()
    row_start, row_end = mask1.argmax(), m - mask1[::-1].argmax()
    return image[row_start:row_end,
               col_start:col_end]

def add_shadow(e_image, offset, blur_radius=51, max_shadow_opacity=200):
    h, w = e_image.shape[:2]
    offset = np.array(offset, dtype=np.int32)
    print('e_image', e_image.shape)
    print('offset', offset)
    
    if offset[1] < -h:
        new_h = -offset[1]
        start_pos = int(e_image.shape[0] * 0.05)
    else:
        new_h = h + (offset[1] if offset[1] > 0 else 0)
        start_pos = 0
    new_w = w + abs(offset[0])
    new_image = np.zeros((new_h, new_w, 4), dtype=np.uint8)
    print('new_image', new_image.shape)
    
    if offset[0] > 0:
        if offset[1] < 0:
            obj_off = [offset[0], 0]
        else:
            obj_off = [offset[0], offset[1]]
    else:
        if offset[1] <= 0:
            obj_off = [0, 0]
        else:
            obj_off = [0, offset[1]]
    
    new_image[obj_off[1]:h+obj_off[1], obj_off[0]:w+obj_off[0]] = e_image
    
#     offset[1] *= new_h / h
    
    blur_padding = blur_radius // 4
    shadow = np.zeros((blur_padding * 2 + new_image.shape[0],
                       blur_padding * 2 + new_image.shape[1],
                       new_image.shape[2] - 3), dtype=np.uint8)
    shadow[blur_padding:-blur_padding, blur_padding:-blur_padding] = new_image[:, :, 3:]
    shadow = (shadow / 255) * max_shadow_opacity
    
    shadow = warpImage(shadow, offset, obj_off, (h, w), start_pos)
    shadow = cv2.GaussianBlur(shadow, (blur_radius, blur_radius), 0)
    if len(shadow.shape) == 2:
        shadow = np.expand_dims(shadow, -1)
    shadow = shadow[blur_padding:-blur_padding, blur_padding:-blur_padding]
    
    mask = new_image[..., 3:] / 255
    new_image[..., :3] = new_image[..., :3] * mask
    new_image[..., 3:] = new_image[..., 3:] + shadow * (1 - mask)
    
    
#     oh, ow = obj_off
#     ow = 0
#     offx, offy = offset
#     pts1 = np.float32([[ow, oh],           [ow+w-1, oh],           [ow, oh+h-1-start_pos], [ow+w-1, oh+h-1-start_pos]])
#     for pnt in pts1:
#         print(pnt)
#         pnt = (int(pnt[0]), int(pnt[1]))
#         cv2.circle(new_image, pnt, 10, (0, 255, 0, 255), -1)
        
#     new_image = trim_empty(new_image)
    cv2.imshow('shadow', shadow * (1 - mask))
    return new_image

In [291]:
img = e_image.copy()
img = resize(img, 400)[0]
h, w = img.shape[:2]
print(img.shape)
offx = int(w * 1)
offy = int(h * -1)

start_pos = int(img.shape[0] * 0.05)
# for pnt in [ [0, h-1-start_pos], [w-1, h-1-start_pos]]:
#     print(pnt)
#     pnt = (int(pnt[0]), int(pnt[1]))
#     cv2.circle(img, pnt, 10, (0, 255, 0, 255), -1)

dst = add_shadow(img, (offx, offy))
cv2.imshow('test', get_img_with_white_bg(dst))
cv2.waitKey(1)

(318, 400, 4)
e_image (318, 400, 4)
offset [ 400 -318]
new_image (318, 800, 4)
start_pos 0
[[400.   0.]
 [799.   0.]
 [400. 317.]
 [799. 317.]]
[[  0. 318.]
 [399. 318.]
 [400. 317.]
 [799. 317.]]
obj_off [400, 0]


-1

In [4]:
e_info = Effect(
    reader_id = 0,
    idx = 1,
    track_id = 1,
    size = 400,
    offset = (0, 0),
    angle = 0,
    is_flip = False,
    transparency = 100 / 100, 
    shadow_off = (30, 30),
    shadow_trans = 0.8,
    gain = 1,
    bias = 0,
    gamma = 1,
    duration = 0,
    cur_dur = 0,
)
cfg = AugmentationConfig()
cfg.__dict__.update({
    'do_resize': True,
    'do_flip': False,
    'do_rotate': False,
    'do_brightness': True,
    'do_gamma': True,
    'do_blur': False,
    'min_bbox_size': 40,
})

def get_img_with_white_bg(image):
    alpha = image[:, :, 3:].copy()
    image = (image[:, :, :3] * (alpha / 255)).astype(np.uint8)
    image += (np.ones(image.shape) * (255 - alpha)).astype(np.uint8)
    return image

it = list(glob('effects/animals/coco_ded_retrieved/*.png'))
np.random.shuffle(it)
for im_path in it:
    e_image = cv2.imread(im_path, -1)
    cv2.imshow('image', get_img_with_white_bg(resize(e_image, 400)[0]))
    e_image_with_shadow, segments = transform_effect(e_image, e_info, None, cfg)
    cv2.imshow('with_shadow', get_img_with_white_bg(e_image_with_shadow))
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break

(106, 133, 4)
e_image (318, 400, 4)
offset [  43 -440]
new_image (440, 443, 4)
start_pos 5
[[  0.   0.]
 [399.   0.]
 [  0. 312.]
 [399. 312.]] [[-43. 440.]
 [356. 440.]
 [  0. 312.]
 [399. 312.]]


In [292]:
cv2.destroyAllWindows()

#### TODO


##### General
* <s>move everything to config file</s>
* написать loop чтобы обработать все видосы

##### Preparation
* <s>remove clear pixels</s>
* remove clear pixels from video

##### Augmentation
* <s>cover all image</s>
* <s>change sizes</s>
* <s>flip</s>
* <s>add animations</s>
* <s>proper resizing</s>
* <s>make an offset point a down center point of the effect image.</s>
* <s>fix merging with video - fix: Color keying</s>
* <s>поиграться с color keying'ом, чтобы более плавные переходы в нём были</s>
* <s>change angle</s>
* <s>contrast and brightness</s>
* <s>gamma correction</s>
* <s>find new effects, add smoke</s>
* warp perspective

##### Annotation
* <s>add dynamic bboxes for videos</s>
* <s>fix bboxes for some videos</s>
* <s>annotate all videos</s>
* <s>annotation for images</s>
* <s>scaling bboxes</s>
* <s>rotating bboxes</s>
* <s>Write to COCO</s>
* <s>Get class from annotation, not by fixing it for Augmentation object</s>
* разметить эффекты не боксом, а полигоном, чтобы можно было нормально поворачивать эффект и при этом иметь нормальный бокс



## Working with videos

#### Scale down with keeping alpha channel
```shell
ffmpeg -i in.mov -filter:v scale=720:-1 -c:v qtrle out.mov
```

#### `png` files to `.mov` with alpha
```shell
ffmpeg -framerate 25 -f image2 -i frames/embergen_gasoline_explosion_a_0.vdb%03d.png -c:v libvpx -pix_fmt yuva420p output.webm
```

#### `.mov` to `.webm` keeping  alpha channel

```shell
ffmpeg -i in.mov -c:v libvpx -pix_fmt yuva420p -auto-alt-ref 0 out.webm
```

#### Extract alpha channel from `.webm`

```shell
ffmpeg -vcodec libvpx -i in.webm -vf alphaextract -y out.mp4
```


#### Extract alpha channel from `.mov`

```shell
ffmpeg -i in.mov -vf alphaextract,format=yuv420p out.mp4
```

#### Compress resulting video
```shell
ffmpeg -i out.mp4 -vcodec h264 -b:v 1000k -acodec mp2 compressed_out.mp4 -y
```

#### Get first `n` frames
```shell
ffmpeg -i in.mp4 -frames:v 249 -c copy out.mp4
```

#### Cut video from `ss` till `t`
```shell
ffmpeg -ss 00:00:8.25 -i in.mp4 -t 00:00:8.25 out.mp4                                                               
```

#### Merge video with alpha channel in `.mov` file
```shell
ffmpeg -i in.mp4 -i alpha_in.mp4 -filter_complex [0][1]alphamerge -c:v qtrle out.mov
```