In [1]:
import csv
import numpy as np
import statistics


In [2]:
video_info = csv.DictReader(open("EPIC_100_video_info.csv"))
video_info = list(video_info)

videos_longer_than_15_minutes = [v for v in video_info if float(v["duration"]) > 900]

## Print min, max, mean, median, mode of video durations
print(f"Total number of videos: {len(video_info)}")
print(f"Number of videos longer than 15 minutes: {len(videos_longer_than_15_minutes)}")
print(f"Min: {min(videos_longer_than_15_minutes, key=lambda x: float(x['duration']))}")
print(f"Max: {max(videos_longer_than_15_minutes, key=lambda x: float(x['duration']))}")
print(f"Mean: {np.mean([float(v['duration']) for v in videos_longer_than_15_minutes])}")
print(f"Median: {statistics.median(float(v['duration']) for v in videos_longer_than_15_minutes)}")


Total number of videos: 700
Number of videos longer than 15 minutes: 117
Min: {'video_id': 'P24_07', 'duration': '911.477233', 'fps': '59.9400599400599', 'resolution': '1920x1080'}
Max: {'video_id': 'P01_109', 'duration': '3708.04', 'fps': '50.0', 'resolution': '1920x1080'}
Mean: 1613.6855396752135
Median: 1444.278483


In [3]:
import pandas as pd
video_id = "P24_07"
narration_low_level = []

narration_low_level_files = ["EPIC_100_train.csv", "EPIC_100_validation.csv"]

for file in narration_low_level_files:
    narration_file = pd.read_csv(file)
    narration_low_level.append(narration_file)

narration_low_level_df = pd.concat(narration_low_level)


In [4]:
narration_sentences = []

narration_sentences_files = ["retrieval_annotations/EPIC_100_retrieval_train_sentence.csv", "retrieval_annotations/EPIC_100_retrieval_test_sentence.csv"]

for file in narration_sentences_files:
    narration_file = pd.read_csv(file)
    narration_sentences.append(narration_file)

narrations_sentences_df = pd.concat(narration_sentences)

In [None]:

video_id = "P37_101"
## Get video info for this video
video_info = [v for v in video_info if v["video_id"] == video_id]
print(video_id, video_info)

## Get visor annotations
visor_annotations = pd.read_csv("visor_annotations/train/P37_101.json")

narrations_sentences_filtered = narrations_sentences_df[narrations_sentences_df["narration_id"].str.startswith(video_id)]
narrations_sentences_filtered = narrations_sentences_filtered.sort_values(
    by="narration_id",
    key=lambda x: x.str.split("_").str[-1].astype(int)
)

for _, row in narrations_sentences_filtered.iterrows():
    narration_id = row["narration_id"]
    narration_phrase = row["narration"]
    nouns = row["nouns"]
    # Get the start_timestamp and stop_timestamp from narration_low_level_df for this narration_id
    match = narration_low_level_df[narration_low_level_df["narration_id"] == narration_id]
    if not match.empty:
        start_timestamp = match.iloc[0]["start_timestamp"]
        stop_timestamp = match.iloc[0]["stop_timestamp"]
    else:
        start_timestamp = None
        stop_timestamp = None
    print(f"narration_id: {narration_id}")
    print(f"narration: {narration_phrase}")
    print(f"nouns: {nouns}")
    print(f"start_timestamp: {start_timestamp}")
    print(f"stop_timestamp: {stop_timestamp}")
    print("------")



In [6]:
import os

files = [f for f in os.listdir("active_objects") if f.endswith(".json")]
video_ids_visor = ["_".join(f.split(".")[0].split("_")[2:]) for f in files]

video_info_ek100 = list(csv.DictReader(open("EPIC_100_video_info.csv")))
print(f"  - Number of videos in EPIC-100: {len(video_info_ek100)}  -")

video_info_visor = list(v for v in video_info_ek100 if v["video_id"] in video_ids_visor)
visor_videos_longer_than_15_minutes = [v for v in video_info_visor if float(v["duration"]) > 900]

## Print min, max, mean, median, mode of video durations
print("VISOR ONLY STATS")
print(f"Number of videos in VISOR: {len(video_info_visor)}")
print(f"Overall median duration: {statistics.median(float(v['duration']) for v in video_info_visor)}")
print(f"Number of videos longer than 15 minutes: {len(visor_videos_longer_than_15_minutes)}")
print(f"Min: {min(visor_videos_longer_than_15_minutes, key=lambda x: float(x['duration']))}")
print(f"Max: {max(visor_videos_longer_than_15_minutes, key=lambda x: float(x['duration']))}")
print(f"Mean for videos longer than 15 minutes: {np.mean([float(v['duration']) for v in visor_videos_longer_than_15_minutes])}")
print(f"Median for videos longer than 15 minutes: {statistics.median(float(v['duration']) for v in visor_videos_longer_than_15_minutes)}")


  - Number of videos in EPIC-100: 700  -
VISOR ONLY STATS
Number of videos in VISOR: 158
Overall median duration: 431.6435
Number of videos longer than 15 minutes: 43
Min: {'video_id': 'P02_109', 'duration': '912.58', 'fps': '50.0', 'resolution': '1920x1080'}
Max: {'video_id': 'P01_09', 'duration': '3571.06815', 'fps': '59.9400599400599', 'resolution': '1920x1080'}
Mean for videos longer than 15 minutes: 1699.820027139535
Median for videos longer than 15 minutes: 1600.901617


In [9]:
for v in visor_videos_longer_than_15_minutes:
    print(f"{v['video_id']}\t|\t{v['duration']}")

P01_01	|	1652.152817
P01_05	|	1271.988033
P01_09	|	3571.06815
P01_14	|	1352.585217
P02_03	|	1252.184917
P02_09	|	2199.849967
P02_12	|	1315.333
P02_109	|	912.58
P02_130	|	1083.46
P03_04	|	1670.754733
P04_02	|	1437.621833
P04_05	|	2068.700617
P04_109	|	980.54
P04_121	|	1693.22
P05_08	|	1280.596633
P06_05	|	1444.278483
P06_07	|	1061.928183
P06_09	|	1344.779083
P06_101	|	1859.1
P08_21	|	1406.772683
P10_04	|	3359.8755
P12_02	|	1853.95275
P12_03	|	1465.86505
P12_04	|	1912.2776829999998
P12_101	|	2045.86
P22_01	|	1091.292517
P22_07	|	2180.6124170000003
P22_117	|	971.82
P23_02	|	1668.233883
P23_05	|	1168.901717
P24_05	|	1600.901617
P24_08	|	1512.88035
P24_09	|	1968.6006170000003
P25_107	|	2212.96
P27_101	|	1360.2
P27_105	|	977.76
P28_103	|	1825.16
P30_05	|	3261.992717
P30_107	|	2994.16
P30_111	|	1840.26
P35_105	|	2148.26
P35_109	|	1129.48
P37_101	|	1681.46
