In [None]:
import os
import json
import pandas as pd
from glob import glob
# from tqdm import tqdm
# tqdm for notebooks
from tqdm import tqdm_notebook as tqdm
import random

# create folder for each dataset first    

In [None]:
def save_json(content, save_path):
    # if no such directory, create one
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))
    with open(save_path, 'w') as f:
        f.write(json.dumps(content))
def load_jsonl(filename):
    with open(filename, "r") as f:
        return [json.loads(l.strip("\n")) for l in f.readlines()]
def load_json(filename):
    with open(filename, "r") as f:
        return json.load(f)

# qvh

In [None]:
# ann_root = '/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/QVHighlights'
ann_root = "../../data/annotations"
train_path = ann_root + '/highlight_train_release.jsonl'
val_path = ann_root + '/highlight_val_release.jsonl'
test_path = ann_root + '/highlight_test_release.jsonl'

In [None]:
train = load_jsonl(train_path)
val = load_jsonl(val_path)
test = load_jsonl(test_path)

In [None]:
def process_QVH(data, relative_time=False, save_float=False, is_test=False):
    out = []
    for d in data:
        sample = {}
        sample['video'] = d['vid']
        sample['qid'] = 'QVHighlight_' + str(d['qid'])
        sample['query'] = d['query']
        duration = d['duration']
        sample['duration'] = duration

        if not is_test:
            windows = d['relevant_windows']
            if relative_time:
                relative_time_windows = []
                for window in windows:
                    start = window[0] / duration
                    end = window[1] / duration

                    if save_float:
                        relative_time_windows.append([round(start, 2), round(end, 2)])
                    else:
                        relative_time_windows.append([int(round(start, 2) * 100), int(round(end, 2) * 100)])
                sample['relevant_windows'] = relative_time_windows
            else:
                sample['relevant_windows'] = windows
        else:
            sample['relevant_windows'] = [[0, 150]] # dummy value

        out.append(sample)

    return out

In [None]:
save_float = False
relative_time = False

new_train = process_QVH(train, relative_time=relative_time, save_float=save_float)
new_val = process_QVH(val, relative_time=relative_time, save_float=save_float)
new_test = process_QVH(test, relative_time=relative_time, save_float=save_float, is_test=True)

In [None]:
# save data
if save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative_float.json')
    save_json(new_val, ann_root + '/lavis/val_relative_float.json')
    save_json(new_test, ann_root + '/lavis/test_relative_float_dummy.json')
elif save_float and not relative_time:
    save_json(new_train, ann_root + '/lavis/train_float.json')
    save_json(new_val, ann_root + '/lavis/val_float.json')
    save_json(new_test, ann_root + '/lavis/test_float_dummy.json')
elif not save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative.json')
    save_json(new_val, ann_root + '/lavis/val_relative.json')
    save_json(new_test, ann_root + '/lavis/test_relative_dummy.json')
else:
    save_json(new_train, ann_root + '/lavis/train.json')
    save_json(new_val, ann_root + '/lavis/val.json')
    save_json(new_test, ann_root + '/lavis/test_dummy.json')

# Charades

In [None]:
# read csv from /pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/Charades/Charades_v1_train.csv
train_df = pd.read_csv('/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/Charades/Charades_original/Charades_v1_train.csv', delimiter=',')
test_df = pd.read_csv('/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/Charades/Charades_original/Charades_v1_test.csv', delimiter=',')

In [None]:
# get all unique ids
train_ids = train_df["id"].unique()
print(len(train_ids))

# randomly select 800 ids for validation
random.seed(42)
random.shuffle(train_ids)

val_ids = train_ids[:800]
train_ids = train_ids[800:]

len(val_ids), len(train_ids)

In [None]:
ann_root = '/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/Charades/Charades_STA'
train_path = ann_root + '/train.txt'
test_path = ann_root + '/test.txt'

In [None]:
def process_charades_STA(data_path, df, video_ids=None, relative_time=False, save_float=False):
    # read txt and put each line into new element in list
    with open(data_path) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    out = []

    for s in content:
        # format "id start end##query"
        s = s.split('##') # -> [id start end, query]
        query = s[1] # -> query
        s = s[0] # -> id start end
        s = s.split(' ') # -> [[id], [start], [end]]
        id = s[0] # -> id

        if video_ids is not None and id not in video_ids:
            continue
        
        # get meta data from df using id
        # get row with id == id
        row = df.loc[df["id"] == id]
        values = row.values[0]

        # get duration
        duration = values[10]

        # convert to float
        s[1] = float(s[1])
        s[2] = float(s[2])
        if s[2] > duration:
            s[2] = duration

        if relative_time:
            # convert to relative time
            s[1] = s[1] / duration
            s[2] = s[2] / duration

            if save_float:
                # For float conversion
                window = [round(s[1], 2), round(s[2], 2)] # -> [start, end]
                assert window[0] >= 0 and window[1] <= 1
            else:
                # For int conversion -> round to nearest int
                window = [int(s[1] * 100), int(s[2] * 100)]
                assert window[0] >= 0 and window[1] <= 100
        else:
            if save_float:
                # For float conversion
                window = [float(s[1]), float(s[2])] # -> [start, end]
            else:
                # For int conversion -> round to nearest int
                window = [round(float(s[1])), round(float(s[2]))]

        # get objects
        objects = values[7]
        # only split if objects is not nan or contains ; (which means multiple objects)
        try:
            objects = objects.split(';')
        except:
            print('no objects: ', objects, ' for id: ', id)
            objects = []

        out.append(
            {
                'id': id,
                'query': query,
                'window': [window],
                'duration': duration,
                'objects': objects
            }
        )

    return out

In [None]:
save_float = True
relative_time = False

train = process_charades_STA(train_path, train_df, train_ids, relative_time=relative_time, save_float=save_float)
val = process_charades_STA(train_path, train_df, val_ids, save_float=save_float)
test = process_charades_STA(test_path, test_df, save_float=save_float)

new_train = []
new_val = []
new_test = []
for i, qa in enumerate(train):
    qa_dict = {}
    qa_dict['video'] = qa['id']
    qa_dict['qid'] = 'Charades-STA_' + str(qa['id'])
    qa_dict['query'] = qa['query']
    qa_dict['duration'] = qa['duration']
    qa_dict['relevant_windows'] = qa['window']
    qa_dict['objects'] = qa['objects']
    new_train.append(qa_dict)

for i, qa in enumerate(val):
    qa_dict = {}
    qa_dict['video'] = qa['id']
    qa_dict['qid'] = 'Charades-STA_' + str(qa['id'])
    qa_dict['query'] = qa['query']
    qa_dict['duration'] = qa['duration']
    qa_dict['relevant_windows'] = qa['window']
    qa_dict['objects'] = qa['objects']
    new_val.append(qa_dict)

for i, qa in enumerate(test):
    qa_dict = {}
    qa_dict['video'] = qa['id']
    qa_dict['qid'] = 'Charades-STA_' + str(qa['id'])
    qa_dict['query'] = qa['query']
    qa_dict['duration'] = qa['duration']
    qa_dict['relevant_windows'] = qa['window']
    qa_dict['objects'] = qa['objects']
    new_test.append(qa_dict)

len(new_train), len(new_val), len(new_test)

In [None]:
# save data
if save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/new_train_relative_float.json')
    save_json(new_val, ann_root + '/lavis/new_val_relative_float.json')
    save_json(new_test, ann_root + '/lavis/test_relative_float.json')
elif save_float and not relative_time:
    save_json(new_train, ann_root + '/lavis/new_train_float.json')
    save_json(new_val, ann_root + '/lavis/new_val_float.json')
    save_json(new_test, ann_root + '/lavis/test_float.json')
elif not save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/new_train_relative.json')
    save_json(new_val, ann_root + '/lavis/new_val_relative.json')
    save_json(new_test, ann_root + '/lavis/test_relative.json')
else:
    save_json(new_train, ann_root + '/lavis/new_train.json')
    save_json(new_val, ann_root + '/lavis/new_val.json')
    save_json(new_test, ann_root + '/lavis/test.json')

In [None]:
# For processing without the custom data slipt, i.e. having only the original train and test split

save_float = True
relative_time = False

train = process_charades_STA(train_path, train_df, None, relative_time=relative_time, save_float=save_float)
test = process_charades_STA(test_path, test_df, save_float=save_float)

new_train = []
new_val = []
new_test = []
for i, qa in enumerate(train):
    qa_dict = {}
    qa_dict['video'] = qa['id']
    qa_dict['qid'] = 'Charades-STA_' + str(qa['id'])
    qa_dict['query'] = qa['query']
    qa_dict['duration'] = qa['duration']
    qa_dict['relevant_windows'] = qa['window']
    qa_dict['objects'] = qa['objects']
    new_train.append(qa_dict)

for i, qa in enumerate(test):
    qa_dict = {}
    qa_dict['video'] = qa['id']
    qa_dict['qid'] = 'Charades-STA_' + str(qa['id'])
    qa_dict['query'] = qa['query']
    qa_dict['duration'] = qa['duration']
    qa_dict['relevant_windows'] = qa['window']
    qa_dict['objects'] = qa['objects']
    new_test.append(qa_dict)

len(new_train), len(new_test)

In [None]:
# save data
if save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative_float.json')
    save_json(new_test, ann_root + '/lavis/test_relative_float.json')
elif save_float and not relative_time:
    save_json(new_train, ann_root + '/lavis/train_float.json')
    save_json(new_test, ann_root + '/lavis/test_float.json')
elif not save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative.json')
    save_json(new_test, ann_root + '/lavis/test_relative.json')
else:
    save_json(new_train, ann_root + '/lavis/train.json')
    save_json(new_test, ann_root + '/lavis/test.json')

# TACoS

In [None]:
import os
from glob import glob
# import VideoFileClip
from moviepy.editor import VideoFileClip

In [None]:
base_path = "/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/raw/"
directory_avi = os.path.join(base_path, "videos")
directory_mp4 = os.path.join(base_path, "TACoS")

In [None]:
# convert avi to mp4 

def convert_avi_to_mp4(filename, avi_path, target_path):
    os.system('ffmpeg -i {} {}'.format(
        os.path.join(avi_path, filename),
        os.path.join(target_path, filename.replace('.avi', '.mp4'))
    ))

# get all avi files
avi_files = glob(os.path.join(directory_avi, "*.avi"))
avi_files = [os.path.basename(f) for f in avi_files]

# convert all avi files to mp4
for avi_file in avi_files:
    # if file already exists, skip
    if not os.path.exists(os.path.join(directory_mp4, avi_file.replace('.avi', '.mp4'))):
        convert_avi_to_mp4(avi_file, directory_avi, directory_mp4)

In [None]:
# downscale resolution to 224x224
target_path = os.path.join(directory_mp4, "res_224")

def downscale_resolution(filename, source_path, target_path):
    os.system('ffmpeg -i {} -vf scale=224:224 {}'.format(
        os.path.join(source_path, filename),
        os.path.join(target_path, filename)
    ))

# get all mp4 files
mp4_files = glob(os.path.join(directory_mp4, "*.mp4"))
mp4_files = [os.path.basename(f) for f in mp4_files]

# downscale all mp4 files to 224x224
for mp4_file in mp4_files:
    # if file already exists, skip
    if not os.path.exists(os.path.join(target_path, mp4_file)):
        downscale_resolution(mp4_file, directory_mp4, target_path)

In [None]:
ann_root = "/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/TACoS"
train_path = os.path.join(ann_root, "train.jsonl")
val_path = os.path.join(ann_root, "val.jsonl")
test_path = os.path.join(ann_root, "test.jsonl")


video_path = "/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/raw/TACoS"

In [None]:
# read jsonl
train = load_jsonl(train_path)
val = load_jsonl(val_path)
test = load_jsonl(test_path)

# show length of each split
len(train), len(val), len(test)

In [None]:
save_float = False
relative_time = True

new_train = []
new_val = []
new_test = []
for i, qa in enumerate(train):
    qa_dict = {}
    qa_dict['video'] = qa['vid']
    qa_dict['qid'] = 'TACoS_' + str(qa['qid'])
    qa_dict['query'] = qa['query']
    # round duration to 2 decimal places
    qa_dict['duration'] = round(qa['duration'], 2)
    
    assert len(qa['relevant_windows']) == 1

    start, end = qa['relevant_windows'][0]
    if relative_time:
        # convert to relative time
        start = start / qa['duration']
        end = end / qa['duration']

        if save_float:
            # For float conversion
            window = [round(start, 2), round(end, 2)] # -> [start, end]
            assert window[0] >= 0 and window[1] <= 1
            qa_dict['relevant_windows'] = [window]
        else:
            # For int conversion -> round to nearest int
            window = [int(round(start, 2) * 100), int(round(end, 2) * 100)]
            assert window[0] >= 0 and window[1] <= 100
            qa_dict['relevant_windows'] = [window]
    else:
        if save_float:
            # For float conversion
            window = [float(start), float(end)] # -> [start, end]
            qa_dict['relevant_windows'] = [window]
        else:
            # For int conversion -> round to nearest int
            window = [round(float(start)), round(float(end))]
            qa_dict['relevant_windows'] = [window]
    
    # qa_dict['objects'] = qa['objects']
    new_train.append(qa_dict)

for i, qa in enumerate(val):
    qa_dict = {}
    qa_dict['video'] = qa['vid']
    qa_dict['qid'] = 'TACoS_' + str(qa['qid'])
    qa_dict['query'] = qa['query']
    # round duration to 2 decimal places
    qa_dict['duration'] = round(qa['duration'], 2)
    
    # round relevant windows to 2 decimal places
    assert len(qa['relevant_windows']) == 1
    if save_float:
        qa_dict['relevant_windows'] = [[float(round(x, 2)) for x in qa['relevant_windows'][0]]]
    else:
        qa_dict['relevant_windows'] = [[int(round(x)) for x in qa['relevant_windows'][0]]]
    
    
    # qa_dict['objects'] = qa['objects']
    new_val.append(qa_dict)

for i, qa in enumerate(test):
    qa_dict = {}
    qa_dict['video'] = qa['vid']
    qa_dict['qid'] = 'TACoS_' + str(qa['qid'])
    qa_dict['query'] = qa['query']
    # round duration to 2 decimal places
    qa_dict['duration'] = round(qa['duration'], 2)
    
    # round relevant windows to 2 decimal places
    assert len(qa['relevant_windows']) == 1
    if save_float:
        qa_dict['relevant_windows'] = [[float(round(x, 2)) for x in qa['relevant_windows'][0]]]
    else:
        qa_dict['relevant_windows'] = [[int(round(x)) for x in qa['relevant_windows'][0]]]
    
    # qa_dict['objects'] = qa['objects']
    new_test.append(qa_dict)

len(new_train), len(new_val), len(new_test)

In [None]:
# save data
if save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative_float.json')
    save_json(new_val, ann_root + '/lavis/val_relative_float.json')
    save_json(new_test, ann_root + '/lavis/test_relative_float.json')
elif save_float and not relative_time:
    save_json(new_train, ann_root + '/lavis/train_float.json')
    save_json(new_val, ann_root + '/lavis/val_float.json')
    save_json(new_test, ann_root + '/lavis/test_float.json')
elif not save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative.json')
    save_json(new_val, ann_root + '/lavis/val_relative.json')
    save_json(new_test, ann_root + '/lavis/test_relative.json')
else:
    save_json(new_train, ann_root + '/lavis/train.json')
    save_json(new_val, ann_root + '/lavis/val.json')
    save_json(new_test, ann_root + '/lavis/test.json')

# ActivityNet Captions

In [None]:
ann_root = "/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/ActivityNet"
train_path = os.path.join(ann_root, "train.json")
val_path = os.path.join(ann_root, "val_1.json")
test_path = os.path.join(ann_root, "val_2.json")


In [None]:
train = load_json(train_path)
val = load_json(val_path)
test = load_json(test_path)

In [None]:
def process_activitynet(data, relative_time=False, save_float=False):
    out = []

    for video_id, sample in data.items():
        duration = sample['duration']
        sentences = sample['sentences']
        timestamps = sample['timestamps']
        for j, (start, end) in enumerate(timestamps):

            if relative_time:
                # convert to relative time
                start = start / duration
                end = end / duration

                if save_float:
                    # For float conversion
                    window = [round(start, 2), round(end, 2)]
                    assert window[0] >= 0 and window[1] <= 1
                else:
                    # For int conversion -> round to nearest int
                    window = [int(round(start, 2) * 100), int(round(end, 2) * 100)]
                    assert window[0] >= 0 and window[1] <= 100
            else:
                if save_float:
                    # For float conversion
                    window = [float(start), float(end)]
                else:
                    # For int conversion -> round to nearest int
                    window = [round(float(start)), round(float(end))]

            new_sample = {
                'video': video_id,
                'qid': f'ActivityNet_{video_id}_{j}',
                'query': sentences[j],
                'duration': duration,
                'relevant_windows': [window]
            }

            out.append(new_sample)
    
    return out

In [None]:
save_float = False
relative_time = False

new_train = process_activitynet(train, relative_time=relative_time, save_float=save_float)
new_val = process_activitynet(val, save_float=save_float)
new_test = process_activitynet(test, save_float=save_float)

len(new_train), len(new_val), len(new_test)

In [None]:
# save data
if save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative_float.json')
    save_json(new_val, ann_root + '/lavis/val_relative_float.json')
    save_json(new_test, ann_root + '/lavis/test_relative_float.json')
elif save_float and not relative_time:
    save_json(new_train, ann_root + '/lavis/train_float.json')
    save_json(new_val, ann_root + '/lavis/val_float.json')
    save_json(new_test, ann_root + '/lavis/test_float.json')
elif not save_float and relative_time:
    save_json(new_train, ann_root + '/lavis/train_relative.json')
    save_json(new_val, ann_root + '/lavis/val_relative.json')
    save_json(new_test, ann_root + '/lavis/test_relative.json')
else:
    save_json(new_train, ann_root + '/lavis/train.json')
    save_json(new_val, ann_root + '/lavis/val.json')
    save_json(new_test, ann_root + '/lavis/test.json')

# ANet TAL

In [None]:
ann_root = "/pfss/mlde/workspaces/mlde_wsp_Rohrbach/data/annotations/ActivityNet_TAL"
data = load_json(ann_root + '/anet_tal.json')

In [None]:
templates_all = [
    'a photo of {}.',
    'a photo of a person {}.',
    'a photo of a person using {}.',
    'a photo of a person doing {}.',
    'a photo of a person during {}.',
    'a photo of a person performing {}.',
    'a photo of a person practicing {}.',
    'a video of {}.',
    'a video of a person {}.',
    'a video of a person using {}.',
    'a video of a person doing {}.',
    'a video of a person during {}.',
    'a video of a person performing {}.',
    'a video of a person practicing {}.',
    'a example of {}.',
    'a example of a person {}.',
    'a example of a person using {}.',
    'a example of a person doing {}.',
    'a example of a person during {}.',
    'a example of a person performing {}.',
    'a example of a person practicing {}.',
    'a demonstration of {}.',
    'a demonstration of a person {}.',
    'a demonstration of a person using {}.',
    'a demonstration of a person doing {}.',
    'a demonstration of a person during {}.',
    'a demonstration of a person performing {}.',
    'a demonstration of a person practicing {}.',
]

templates_video = [
    'a video of {}.',
    'a video of a person {}.',
    'a video of a person using {}.',
    'a video of a person doing {}.',
    'a video of a person during {}.',
    'a video of a person performing {}.',
    'a video of a person practicing {}.',
    'a example of {}.',
    'a example of a person {}.',
    'a example of a person using {}.',
    'a example of a person doing {}.',
    'a example of a person during {}.',
    'a example of a person performing {}.',
    'a example of a person practicing {}.',
    'a demonstration of {}.',
    'a demonstration of a person {}.',
    'a demonstration of a person using {}.',
    'a demonstration of a person doing {}.',
    'a demonstration of a person during {}.',
    'a demonstration of a person performing {}.',
    'a demonstration of a person practicing {}.',
]

In [None]:
def process_activitynet_tal(data, relative_time=False, save_float=False):
    out_train = []
    out_val = []
    out_test = []

    for video_id, sample in data.items():
        video_id = "v_" + video_id
        duration = sample['duration']
        split = sample['subset']
        annotations = sample['annotations']
        windows = []
        for ann in annotations:
            start = ann['segment'][0]
            end = ann['segment'][1]

            # label will always be the same for all annotations/ windows
            label = ann['label']

            if relative_time:
                # convert to relative time
                start = start / duration
                end = end / duration

                if save_float:
                    # For float conversion
                    window = [round(start, 2), round(end, 2)]
                    assert window[0] >= 0 and window[1] <= 1
                else:
                    # For int conversion -> round to nearest int
                    window = [int(round(start, 2) * 100), int(round(end, 2) * 100)]
                    assert window[0] >= 0 and window[1] <= 100
            else:
                if save_float:
                    # For float conversion
                    window = [float(start), float(end)]
                else:
                    # For int conversion -> round to nearest int
                    window = [round(float(start)), round(float(end))]
            
            windows.append(window)

        # get one example of the template and insert the label in lower case
        query = random.choice(templates_video).format(label.lower())

        new_sample = {
            'video': video_id,
            'qid': f'ActivityNet_{video_id}_0',
            'query': query,
            'duration': duration,
            'relevant_windows': windows
        }

        if split == 'training':
            out_train.append(new_sample)
        elif split == 'validation':
            out_val.append(new_sample)
        elif split == 'testing':
            out_test.append(new_sample)
    
    return out_train, out_val, out_test

In [None]:
save_float = False
relative_time = False

train, val, test = process_activitynet_tal(data["database"], relative_time=relative_time, save_float=save_float)

len(train), len(val), len(test)

In [None]:
# save data
if save_float and relative_time:
    save_json(train, ann_root + '/lavis/train_relative_float.json')
    save_json(val, ann_root + '/lavis/val_relative_float.json')
    save_json(test, ann_root + '/lavis/test_relative_float.json')
elif save_float and not relative_time:
    save_json(train, ann_root + '/lavis/train_float.json')
    save_json(val, ann_root + '/lavis/val_float.json')
    save_json(test, ann_root + '/lavis/test_float.json')
elif not save_float and relative_time:
    save_json(train, ann_root + '/lavis/train_relative.json')
    save_json(val, ann_root + '/lavis/val_relative.json')
    save_json(test, ann_root + '/lavis/test_relative.json')
else:
    save_json(train, ann_root + '/lavis/train.json')
    save_json(val, ann_root + '/lavis/val.json')
    save_json(test, ann_root + '/lavis/test.json')