In [23]:
from moviepy.video.io.VideoFileClip import VideoFileClip
import pandas as pd
import json
import copy

In [36]:
def split_video(video_path,out_path, start_timestamp, end_timestamp, out_name):
    # Load the video clip
    video_clip = VideoFileClip(video_path)

    # Convert the start and end timestamps to seconds
    start_time = convert_timestamp_to_seconds(start_timestamp)
    end_time = convert_timestamp_to_seconds(end_timestamp)

    # Extract the video segment between the start and end times
    clip_segment = video_clip.subclip(start_time, end_time)

    # Write the output video segment to a new file
    output_filename = out_path + out_name + '.mp4'
    clip_segment.write_videofile(output_filename)

def convert_timestamp_to_seconds(timestamp):
    # Split the timestamp string into minutes, seconds, and milliseconds
    minutes, seconds = timestamp.split(":")
    # seconds, milliseconds = seconds.split(".")

    # Convert the minutes, seconds, and milliseconds to integers
    minutes = int(minutes)
    seconds = float(seconds)
    # milliseconds = int(milliseconds + "0" * (3 - len(milliseconds)))

    # Calculate the total number of seconds and return the result
    # total_seconds = minutes * 60 + seconds + milliseconds / 1000
    total_seconds = minutes * 60 + seconds
    return total_seconds


In [None]:
# start = '00:10.0'
# end = '00:20.0'
#
# out_path = 'C:/Users/Snov/Documents/MBZUAI_Local/splitter_output/'
# in_path = 'C:/Users/Snov/Documents/MBZUAI_Local/sample.mp4'
#
# split_video(in_path, out_path, start, end)

In [None]:
# Dict template
dict_temp = {"id":"74225",
            "label":"spinning cube that quickly stops spinning",
            "template":"Spinning [something] that quickly stops spinning",
            "placeholders":["cube"]}

In [37]:
def get_template(label, nouns):
    template = copy.deepcopy(label)
    for noun in nouns:
        template = template.replace(noun, '[something]')
    return template

In [38]:
def get_attributes(narration_id, label, nouns, template):
    dict_template = {"id":narration_id,
                "label":label,
                "template":template,
                "placeholders":nouns}
    return dict_template

In [75]:
def generate_json(annotations_list, output_json_name, path = ''):
    json_str = json.dumps(annotations_list)
    indented_str = json_str.replace('{', '\n{')

    with open(path + output_json_name, "w") as f:
        f.write(indented_str)


In [40]:
def convert_video(EPIC_dataframe, video_name, path):
    filtered_df = EPIC_dataframe[EPIC_dataframe['video_id'] == video_name]
    in_path = path + 'input/' + video_name + '.mp4'
    out_path = path + 'output/'

    video_annotations = []

    for index, row in filtered_df.iterrows():
        start = row['start_timestamp']
        end = row['stop_timestamp']
        verb = row['verb']
        nouns = eval(row['all_nouns'])
        narration_id = row['narration_id']
        label = row['narration']
        placeholders = nouns

        # creating the template
        template = get_template(label, nouns)

        print("id: {},\nlabel: {},\ntemplate: {},\nplaceholders: {},\ntart: {},\nend {}\n".format(narration_id, label, template, placeholders, start, end))

        # split the video according to the timestamps
        # split_video(in_path, out_path, start, end, narration_id)

        #get the attributes same as Something-Something dataset
        video_annotations.append(get_attributes(narration_id, label, nouns, template))

    return video_annotations



In [20]:

df = pd.read_csv('EPIC_100_train.csv')
df.head(10)

Unnamed: 0,narration_id,participant_id,video_id,narration_timestamp,start_timestamp,stop_timestamp,start_frame,stop_frame,narration,verb,verb_class,noun,noun_class,all_nouns,all_noun_classes
0,P01_01_0,P01,P01_01,00:01.1,00:00.1,00:03.4,8,202,open door,open,3,door,3,['door'],[3]
1,P01_01_1,P01,P01_01,00:02.6,00:04.4,00:06.2,262,370,turn on light,turn-on,6,light,114,['light'],[114]
2,P01_01_10,P01,P01_01,00:23.3,00:25.0,00:26.2,1498,1572,open drawer,open,3,drawer,8,['drawer'],[8]
3,P01_01_100,P01,P01_01,07:57.9,07:59.8,08:00.9,28785,28852,take cup,take,0,cup,13,['cup'],[13]
4,P01_01_101,P01,P01_01,08:00.0,08:01.5,08:02.2,28888,28932,open cupboard,open,3,cupboard,3,['cupboard'],[3]
5,P01_01_102,P01,P01_01,08:01.2,08:02.1,08:03.0,28927,28980,put cup into cupboard,put-into,5,cup,13,"['cup', 'cupboard']","[13, 3]"
6,P01_01_103,P01,P01_01,08:03.9,08:05.2,08:07.2,29113,29232,take container and lid,take,0,container,21,"['container', 'lid']","[21, 6]"
7,P01_01_104,P01,P01_01,08:07.6,08:08.4,08:09.1,29302,29347,put container on top of counter,put-on,1,container,21,"['container', 'top:counter']","[21, 42]"
8,P01_01_105,P01,P01_01,08:09.9,08:12.0,08:12.7,29520,29564,open container,open,3,container,21,['container'],[21]
9,P01_01_106,P01,P01_01,08:12.9,08:13.2,08:14.4,29593,29662,put container inside container,put-inside,5,container,21,"['container', 'container']","[21, 21]"


In [41]:
annotations_list = []
epic_df = pd.read_csv('EPIC_100_train.csv')
video_names = ['P01_04']
path = 'C:/Users/Snov/Documents/MBZUAI_Local/splitter'

for video in video_names:
    annotations_list.extend(convert_video(epic_df, video,path))

print(annotations_list)

id: P01_04_0,
label: take cup,
template: take [something],
placeholders: ['cup'],
tart: 00:00.1,
end 00:03.0

id: P01_04_1,
label: put down cup,
template: put down [something],
placeholders: ['cup'],
tart: 00:02.9,
end 00:05.1

id: P01_04_10,
label: fold tablecloth,
template: fold [something],
placeholders: ['tablecloth'],
tart: 00:30.7,
end 00:35.0

id: P01_04_11,
label: pull down tablecloth,
template: pull down [something],
placeholders: ['tablecloth'],
tart: 00:37.3,
end 00:38.8

id: P01_04_12,
label: take washing up liquid,
template: take washing up liquid,
placeholders: ['liquid:washing:up'],
tart: 00:41.4,
end 00:42.1

id: P01_04_13,
label: pour washing up liquid into cup,
template: pour washing up liquid into [something],
placeholders: ['liquid:washing:up', 'cup'],
tart: 00:42.2,
end 00:44.5

id: P01_04_14,
label: open tap,
template: open [something],
placeholders: ['tap'],
tart: 00:45.3,
end 00:46.8

id: P01_04_15,
label: wash cup,
template: wash [something],
placeholders: ['cu

In [76]:
generate_json(annotations_list, 'Train.json')

In [None]:
# out_path = 'C:/Users/Snov/Documents/MBZUAI_Local/splitter_output/'
# in_path = 'C:/Users/Snov/Documents/MBZUAI_Local/' + video_name + '.mp4'
#
# for index, row in filtered_df.iterrows():
#     start = row['start_timestamp']
#     end = row['stop_timestamp']
#     verb = row['verb']
#     noun = row['noun']
#
#     out_name = verb + '_' + noun
#     print(start, end)
#     split_video(in_path, out_path, start, end, out_name)