 This file extracts frames from video that has an annotated bounding box and creating a new json file as 
 new labels for each frame. 

### inputs files:
1. data_interim_annotate_video.xlsx: providing all informations needed to put into label files except the bounding box
2. video.json: exported from supervisely containing inforamtion of bounding box of each frame
3. original video file
4. species_list.xlsx: provided by saving nature with all class, order,family, genus info of species

### outputs:
- Folder 1 contains:
    label.json: containing all relatively infos from data_interim_annotate_video.xlsx + supervisely json
- Folder 2 contains:
    frame.jpg: individual frame with a bounding box annotation from video 


## RUN ONLY ONCE

In [1]:
# uncomment below to install objectpath module in the anaconda environment
#! python3 -m pip install objectpath

In [32]:
# import libraries
import cv2
import os
import json
import pandas as pd
import re
import objectpath
import numpy as np
import difflib

In [33]:
########### define universe variable ###########
rootdir = "/Users/xinwen/Desktop/Capstone"
video_summary = "data_interim_annotate_video.xlsx"

# a list contains species, scientific name and common name
df = pd.read_excel(rootdir+"/species_list.xlsx",usecols="C:F,H,J,K")
df.drop_duplicates()
sp_list = list(df['Species'].apply(lambda x: x if (np.all(pd.notnull(x))) else " ")) + list(df['Scientific Name'].apply(lambda x: x if (np.all(pd.notnull(x))) else " "))+ list(df['Common Name'].apply(lambda x: x if (np.all(pd.notnull(x))) else " "))


In [72]:
# create new label and new image folder for the FIRST TIME ONLY
try:
    os.makedirs(f'{rootdir}/new_label')
    os.makedirs(f'{rootdir}/new_image')
except OSError as e:
    raise

In [73]:
def metadata_by_row(summary,video,data):
    ''' input: summary: video summary excel sheet
               video: video file name
               data: dictionary holds info for json creation
        output: data with all necessary metadata from summary exel 
    '''
    df = pd.read_excel(rootdir+'/'+summary,usecols="A:E,H:Q,T:V")
    row = df.loc[df.file_name_new == video]
    for j in list(row):
        if type(row.iloc[0][j])in (np.float64,np.int64):
            if np.isnan(row.iloc[0][j]) :
                data[f'{j}'] = ""
            else:
                data[f'{j}'] = int(row.iloc[0][j])
        else:
            data[f'{j}'] = row.iloc[0][j]
    return data


def frame_numbers(path):
    '''input: path: supervisely json file
       output: a list of frame number
    '''
    with open(path) as f:
        data = json.load(f)
        label_info = objectpath.Tree(data)
        frame_num = list(label_info.execute('$.frames.index'))
        return frame_num

    
def create_new_json(summary,video):
    '''input: summary: video summary excel sheet created by JY
              video: video file name
       output: create new json file for each annotated frame
    '''
    # read in the supervisely json file
    with open(f"{rootdir}/labels/{video}.json") as f:
        data = json.load(f)
        label_info = objectpath.Tree(data)
        
        
        objects = list(label_info.execute('$.objects')) # list of species 
        frames = list(label_info.execute('$.frames'))  # list of frames
        
        # create a new json for every frame
        for f in frames:
            dataNew = {}
            each_obj = []
            dataNew['description'] = data['description']
            dataNew['tags'] = data['tags']
            dataNew['size'] = data['size']
            
            animal_count = 0
            for i in f['figures']:
                sp_name  = [a for a in objects for k,v in a.items() if v == i['objectKey']][0]['classTitle']
                sp_name = re.sub(r"(\w)([A-Z])", r"\1 \2", sp_name)
                obj = {'species':sp_name}
                
                # insert the class, order,family and genus from species_list.xlsx
                correct_match = difflib.get_close_matches(sp_name,sp_list)
                if correct_match:
                    temp = df[df.isin([correct_match[0]]).any(axis=1)].iloc[0,:4]
                    for colname,value in zip(temp.index,temp):
                        obj[colname] = value
                else:
                    # hard coding when cannot find the species 
                    obj['Class'] = ""; obj['Order'] = ""; obj['Family'] = ""; obj['Genus'] = ""
                    
                # insert bounding box infos
                for k,v in i.items():
                    if k == "geometry":
                        for k1,v1 in v.items():
                            obj[k1]=v1
                    else:
                        obj[k]=v
                each_obj.append(obj)
                dataNew['objects'] = each_obj
                animal_count += 1
            dataNew['num_animal'] = animal_count
            
            metadata_by_row(summary,video,dataNew)
            
            with open(rootdir+f"/new_label/{video[:-4]}_{f['index']}.json",'w') as outfile:
                json.dump(dataNew,outfile,indent = 4)


In [78]:
def cropFrame(video,frame_list):
    """ Input: video: video file name
               frame_list: frame_num from frame_numbers(path)
        Output: crop frames according to frame_list for each video
    """
    vidcap = cv2.VideoCapture(video)
    for i in frame_list:
        vidcap.set(1,i)
        hasFrames,image = vidcap.read()
        sucess = hasFrames
        if sucess:
            print("sucess!!!!")
            cv2.imwrite(f'{video}_{i}.jpg', image)

            
def generate_image_from_video():
    """ output: for each video in the folder
                    1.get the corresponding frame number and bounding box
                    2.extract the frames from the video
                    3.get all infos of the video from video_summary xlsx
                    4.create new json label for each extracted frames
    """
    for filename in os.listdir(rootdir):
        if filename.endswith(".mp4"):
            fn = frame_numbers(f"{rootdir}/labels/{filename}.json") #1
            cropFrame(f"{rootdir}/videos/{filename}",fn)#2
            create_new_json(video_summary,filename)#4
            


In [79]:
# crop frames
generate_image_from_video() 

0050_brazilAMLD_CT01_Ghost_01_08110020.mp4
0000_brazilAMLD_SSAMLD2_2_Guerlinguetus_01_2019_10_14_08_52_18.mp4
sucess!!!!
sucess!!!!
