### Imports

In [None]:
import os
import pickle
import random
from packages.dataset import *
from packages.video_utils import H264Extractor, VideoHandler, Gop
from packages.constants import GOP_SIZE, FRAME_WIDTH, FRAME_HEIGHT, DATASET_ROOT

### Constants

In [None]:
FILENAME = 'datasets/VISION_files.txt'
# DATASET_ROOT = 'datasets/VISION'
if not os.path.exists(DATASET_ROOT):
    raise Exception('Dataset root does not exist!')

PROJECT_PATH = os.getcwd()

BIN_PATH = os.path.abspath(os.path.join(PROJECT_PATH, 'h264-extractor', 'bin'))
H264_BIN_PATH = os.path.join(BIN_PATH, 'h264dec_ext_info')

### Utility functions

In [None]:
# URL example
# https://lesc.dinfo.unifi.it/VISION/dataset/D01_Samsung_GalaxyS3Mini/videos/flatWA/D01_V_flatWA_panrot_0001.mp4

def parse_url(url):
    structure = url.split('https://lesc.dinfo.unifi.it/VISION/dataset/')[1].split('/')
    device = structure[0]
    media_type = structure[1]
    property = structure[2]
    name = structure[3]

    return device, media_type, property, name

def download(url, path):
    if not os.path.exists(path):
        os.makedirs(path)
    os.system(f'wget -P {path} {url}')

def get_videos_structure_from_url_file(filename):
    videos = []
    with open(filename) as f:
        urls = f.readlines()

        for url in urls:
            url = url.split('\n')[0]
            device, media_type, property, name = parse_url(url)

            if media_type == 'videos' and name.endswith('.mp4'):
                videos.append((url, device, media_type, property, name))
        
    return videos



### Generate GOP dataset

In [None]:
def build_gop_dataset(videos, devices = [], media_types = [], properties = [], max_gops = 0, shuffle = True):
    if shuffle:
        random.shuffle(videos)
    
    iteration = 0
    for url, device, media_type, property, name in videos:
        if len(devices) > 0 and device not in devices:
            continue

        if len(media_types) > 0 and media_type not in media_types:
            continue

        if len(properties) > 0 and property not in properties:
            continue

        if iteration >= max_gops and max_gops > 0:
            break

        mp4_filename = os.path.join(DATASET_ROOT, device, media_type, property, name)
        mp4_save_path = os.path.join(DATASET_ROOT, device, media_type, property)
        if not os.path.exists(mp4_filename):
            download(url, mp4_save_path)
        
        # extractor = H264Extractor(H264_BIN_PATH, os.path.join(PROJECT_PATH, '.vision-cache'))

        # h264_filename = extractor.convert_to_h264(mp4_filename)
        # yuv_filename, coded_data_filename = extractor.extract_yuv_and_codes(h264_filename)

        # video_handler = VideoHandler(mp4_filename, h264_filename, yuv_filename, coded_data_filename)

        # gop = Gop(video_handler, GOP_SIZE, FRAME_WIDTH, FRAME_HEIGHT)

        # os.remove(h264_filename)
        # os.remove(yuv_filename)
        # os.remove(coded_data_filename)

        # if os.path.exists(h264_filename) or os.path.exists(yuv_filename) or os.path.exists(coded_data_filename):
        #     raise Exception('Error removing files')
        
        # gop_save_path = os.path.join(DATASET_ROOT + '_GOPS', device, media_type, property)
        
        # if not os.path.exists(gop_save_path):
        #     os.makedirs(gop_save_path)
        # pickle.dump(gop, open(os.path.join(gop_save_path, gop.video_handler.name) + f'_{GOP_SIZE}_{FRAME_WIDTH}x{FRAME_HEIGHT}.pkl', 'wb'))
            
        iteration += 1

In [None]:
videos = get_videos_structure_from_url_file(FILENAME)

build_gop_dataset(videos, max_gops = 0, shuffle=False)

### Open GOP dataset

In [None]:
gop_dataset_root = '/media/thomas/TN_SSD/h4vdm_datasets/VISION_GOPS'


devices = os.listdir(gop_dataset_root)
for device in devices:
    media_types = os.listdir(os.path.join(gop_dataset_root, device))

    for media_type in media_types:
        properties = os.listdir(os.path.join(gop_dataset_root, device, media_type))

        for property in properties:
            files = os.listdir(os.path.join(gop_dataset_root, device, media_type, property))

            for file in files:
                read_gop = pickle.load(open(os.path.join(gop_dataset_root, device, media_type, property, file), 'rb'))
                print(f'Processing {read_gop.video_handler.name} with {read_gop.length} frames')
                print(inter_frame.shape for inter_frame in read_gop.inter_frames)
                print(read_gop.intra_frame.shape)
                
    if True:
        break
