In [1]:
files_to_use = ['Tennis_1920x1080_24fps_8bit_420_Motion_QP47_SFB_1.avi',
                'Tennis_1920x1080_24fps_8bit_420_Motion_QP32_BT_1.avi',
                'DanceKiss_1920x1080_25fps_8bit_420_Dark_QP47_FB_4.avi',
                'DanceKiss_1920x1080_25fps_8bit_420_Dark_QP32_SB_4.avi',
                'Kimono1_1920x1080_24fps_8bit_420_graininess_QP47_B_4.avi',
                'Kimono1_1920x1080_24fps_8bit_420_graininess_QP32_FB_1.avi',
                'OldTownCross_1920x1080_25fps_8bit_420_graininess_QP47_SB_4.avi',
                'OldTownCross_1920x1080_25fps_8bit_420_graininess_QP32_SBT_2.avi',
                'BirdsInCage_1920x1080_30fps_8bit_420_Pristine_QP47_SFB_3.avi',
                'BirdsInCage_1920x1080_30fps_8bit_420_Pristine_QP32_FBT_1.avi',
                'ElFuente1_1920x1080_30fps_8bit_420_aliasing_QP47_SFB_1.avi',
                'ElFuente1_1920x1080_30fps_8bit_420_aliasing_QP32_FB_4.avi',
                'ElFuente2_1920x1080_30fps_8bit_420_graininess_QP47_SFB_3.avi',
                'ElFuente2_1920x1080_30fps_8bit_420_graininess_QP32_S_2.avi',
                'BQTerrace_1920x1080_30fps_8bit_420_aliasing_QP47_FB_3.avi',
                'BQTerrace_1920x1080_30fps_8bit_420_aliasing_QP32_SF_4.avi',
                'CrowdRun_1920x1080_25fps_8bit_420_aliasing_QP47_SFT_4.avi',
                'CrowdRun_1920x1080_25fps_8bit_420_aliasing_QP32_SF_1.avi',
                'Seeking_1920x1080_25fps_8bit_420_graininess_QP47_SF_2.avi',
                'Seeking_1920x1080_25fps_8bit_420_graininess_QP32_SFT_1.avi',
                'riverbed_1920x1080_25fps_8bit_420_banding_QP47_SFBT_2.avi',
                'riverbed_1920x1080_25fps_8bit_420_banding_QP32_S_3.avi',
                'station_1920x1080_30fps_8bit_420_graininess_QP47_SBT_2.avi',
                'station_1920x1080_30fps_8bit_420_graininess_QP32_SB_1.avi',
                'shields_1280x720_50fps_8bit_420_graininess_QP47_SBT_3.avi',
                'shields_1280x720_50fps_8bit_420_graininess_QP32_SFBT_2.avi']

In [2]:
from pathlib import Path

In [3]:
dataset_path = Path('/Volumes/SSD/BVIArtefact')

parts = ['part1', 'part2']

# file paths of all files in files_to_use in part1 and part2
file_paths = []
for part in parts:
    file_path = dataset_path / part
    all_files = list(file_path.glob('*.avi'))
    for file in all_files:
        if file.name in files_to_use:
            file_paths.append(file)    

In [5]:
len(file_paths)

26

In [6]:
# copy files to a new folder
import shutil

new_folder = Path('/Volumes/SSD/BVIArtefact/subset_for_patching')
new_folder.mkdir(exist_ok=True)
for file in file_paths:
    shutil.copy(file, new_folder)

In [7]:
# copy labels of files in file from /Volumes/SSD/BVIArtefact/processed_labels.json to /Volumes/SSD/BVIArtefact/subset_for_patching
import json

with open(dataset_path / 'processed_labels.json', 'r') as f:
    labels = json.load(f)
    
new_labels = {}
for file in file_paths:
    new_labels[file.name] = labels[file.name]
    
with open(new_folder / 'labels.json', 'w') as f:
    json.dump(new_labels, f)

In [10]:
import os
import random

# Paths (Assuming the script is in the same directory as the dataset)
dataset_dir = '/Volumes/SSD/subsets/subset_for_patching'
labels_file = os.path.join(dataset_dir, 'labels.json')

# Load the labels
with open(labels_file, 'r') as f:
    labels = json.load(f)

# Split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Ensure the output directories exist
train_dir = os.path.join(dataset_dir, 'train')
val_dir = os.path.join(dataset_dir, 'val')
test_dir = os.path.join(dataset_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get list of all video files
video_files = [f for f in os.listdir(dataset_dir) if f.endswith('.avi')]

# Shuffle the dataset
random.shuffle(video_files)

# Calculate the split indices
train_idx = int(len(video_files) * train_ratio)
val_idx = train_idx + int(len(video_files) * val_ratio)

# Split the files
train_files = video_files[:train_idx]
val_files = video_files[train_idx:val_idx]
test_files = video_files[val_idx:]

# Helper function to move files and save labels
def move_files_and_save_labels(files, destination_dir, label_dict):
    dest_labels = {}
    for file in files:
        # Skip hidden files or files not present in the label_dict
        if file not in label_dict:
            print(f"Skipping {file} as it is not found in labels.json")
            continue
        src_path = os.path.join(dataset_dir, file)
        dest_path = os.path.join(destination_dir, file)
        shutil.move(src_path, dest_path)
        dest_labels[file] = label_dict[file]
    
    # Save the labels file
    labels_file_path = os.path.join(destination_dir, 'labels.json')
    with open(labels_file_path, 'w') as f:
        json.dump(dest_labels, f, indent=4)

# Move the files and save the corresponding labels
move_files_and_save_labels(train_files, train_dir, labels)
move_files_and_save_labels(val_files, val_dir, labels)
move_files_and_save_labels(test_files, test_dir, labels)

print("Dataset has been reorganized successfully!")

Skipping ._Kimono1_1920x1080_24fps_8bit_420_graininess_QP32_FB_1.avi as it is not found in labels.json
Skipping ._ElFuente1_1920x1080_30fps_8bit_420_aliasing_QP32_FB_4.avi as it is not found in labels.json
Skipping ._BQTerrace_1920x1080_30fps_8bit_420_aliasing_QP32_SF_4.avi as it is not found in labels.json
Skipping ._Seeking_1920x1080_25fps_8bit_420_graininess_QP47_SF_2.avi as it is not found in labels.json
Skipping ._BirdsInCage_1920x1080_30fps_8bit_420_Pristine_QP32_FBT_1.avi as it is not found in labels.json
Skipping ._riverbed_1920x1080_25fps_8bit_420_banding_QP32_S_3.avi as it is not found in labels.json
Skipping ._station_1920x1080_30fps_8bit_420_graininess_QP32_SB_1.avi as it is not found in labels.json
Skipping ._shields_1280x720_50fps_8bit_420_graininess_QP32_SFBT_2.avi as it is not found in labels.json
Skipping ._DanceKiss_1920x1080_25fps_8bit_420_Dark_QP32_SB_4.avi as it is not found in labels.json
Skipping ._DanceKiss_1920x1080_25fps_8bit_420_Dark_QP47_FB_4.avi as it is no