In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import json
import os
from pycocotools.coco import COCO
plt.rcParams["figure.figsize"] = (15,15)

In [3]:
def mdnet2list(video_fin, video_results, frames, offset=-1):
    '''
    video_fin: mdnet input folder for a single video 
    
    results: list of filenames w/ video results 
    
    frames: list of all the frames 
    
    output: list of dictionaries  
    '''
    
    output = [{'img': frame, 'bbox': []} for frame in sorted(os.listdir(frames)) if frame.endswith('.jpg')]
    frame_num = int(output[-1]['img'].split('.jpg')[0].split('-')[-1])
    assert(frame_num == len(output))
#     print(output[0])
    
    for result in video_results: 
        
        with open(result) as f:
            print(result)
            res_contts = json.load(f)
        
        
        fn = result.split('/')[-1]
        input_fn = f'{video_fin}/{fn}'
        with open(input_fn) as f:
            i_contts = json.load(f)
        
        tmp = i_contts['img_list'][0].split('/')[-1]
        img_i = int(tmp.split('.jpg')[0].split('-')[-1]) + offset
#         print('img_i', img_i,tmp)
        
        for bbox in res_contts['res']:
            if img_i < frame_num:
                output[img_i]['bbox'].append(bbox)
                img_i +=1
            else:
                print('dropped', img_i, bbox)
#             except Exception:
#                 print(img_i, len(output))
        
    
    return output
    

In [4]:
def mdnet2dict_all(fin_folder = 'mdnet_input', results_fldr='results', frame_fldr='shark_frames'):
    '''
    fin_folder: folder containing all mdnet input (for figuring out where it starts out)
    
    results: results folder containing all the results
    
    '''
    
    # get all video folders 
    fin_fldrs = [fldr for fldr in os.listdir(fin_folder) if os.path.isdir(fin_folder)]
#     fin_fldrs = ['20210910_SUPInteraction']
    fldr2bbox = {}
    # get list of corresponding video results in results folder
    # expecting it to have it be the same 
    for fldr in fin_fldrs:
        video_fin = f'{fin_folder}/{fldr}'
        
        video_results = [f'{results_fldr}/{f}' for f in os.listdir(results_fldr) if f.startswith(fldr) and f.endswith('.json')]
        
        frames = f'{frame_fldr}/{fldr}'
        if len(video_results) >0:
            fldr2bbox[fldr] = mdnet2list(video_fin, video_results, frames)
    return fldr2bbox

In [5]:
bboxes = mdnet2dict_all()

results/2020731_JWSSeaLIonSUP_shark_0.json
results/2020731_JWSSeaLIonSUP_1_0.json
results/2020731_JWSSeaLIonSUP_human_1.json
results/20210121_SharkNearTwoSurfers_shark_0.json
results/20210121_SharkNearTwoSurfers_human_0.json
results/20210121_SharkNearTwoSurfers_1_1.json
results/20210121_SharkNearTwoSurfers_1_0.json
results/20210121_SUPNexttoShark_human_0.json
results/20210121_SUPNexttoShark_1_0.json
results/20210121_SUPNexttoShark_shark_0.json
results/20200805_OneSharkSUPFollowsKids_1_0.json
results/20200805_OneSharkSUPFollowsKids_shark_0.json
results/20200805_OneSharkSUPSurfers_1_6.json
results/20200805_OneSharkSUPSurfers_human_5.json
results/20200805_OneSharkSUPSurfers_1_7.json
results/20200805_OneSharkSUPSurfers_1_11.json
results/20200805_OneSharkSUPSurfers_human_3.json
results/20200805_OneSharkSUPSurfers_1_0.json
results/20200805_OneSharkSUPSurfers_1_1.json
results/20200805_OneSharkSUPSurfers_human_2.json
results/20200805_OneSharkSUPSurfers_1_10.json
results/20200805_OneSharkSUPSur

In [6]:
def generate_results_boxes(frames_fldr, bboxes_l, output_folder='test', box_thickness=10, draw_text=True, 
                            fontScale = 4, color = (36,255,12), # neon green
                            font = cv2.FONT_HERSHEY_SIMPLEX, text_thickness = 10, show_img=None):
    for frame in bboxes_l:
#         print(frame)
        fn = f"{frames_fldr}/{frame['img']}"
        bboxes = frame['bbox']
        image = cv2.cvtColor(cv2.imread(fn), cv2.COLOR_BGR2RGB)
        
        # adding annotations 
        for box in bboxes:
            rects = np.array(box).astype(int)
            s = rects[:2]
            e = rects[:2] + rects[2:]
            image = cv2.rectangle(image, s, e, color, box_thickness)
            
        if draw_text:
            org = (50,150)
            image = cv2.putText(image, frames_fldr, org, font, 
                   fontScale, color, text_thickness, cv2.LINE_AA)
        
        out_fn = f"{output_folder}/{frame['img']}"
        if len(bboxes) >0 and not cv2.imwrite(out_fn, image):
            print(out_fn)
        if show

In [7]:
for k, v in bboxes.items():
    frames_fldr = f'shark_frames/{k}'
    print(frames_fldr)
    generate_results_boxes(frames_fldr, v)

shark_frames/2020731_JWSSeaLIonSUP
shark_frames/20210121_SharkNearTwoSurfers
shark_frames/20210121_SUPNexttoShark
shark_frames/20200805_OneSharkSUPFollowsKids
shark_frames/20200805_OneSharkSUPSurfers


In [8]:
ls test | wc -l

    4479


In [9]:


!ffmpeg -framerate 30 -pattern_type glob -i 'test/*.jpg' -c:v libx264 -pix_fmt yuv420p result.mp4


ffmpeg version 5.0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with Apple clang version 13.0.0 (clang-1300.0.29.30)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/5.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox
 