In [48]:
import random

def split_train_test(train_file, test_file, test_ratio=0.1):
    # Read the train.txt file
    with open(train_file, 'r') as f:
        lines = f.readlines()

    # Shuffle the lines randomly
    random.shuffle(lines)

    # Calculate the number of test samples
    num_test_samples = int(len(lines) * test_ratio)

    # Split the lines into train and test sets
    test_lines = lines[:num_test_samples]
    train_lines = lines[num_test_samples:]

    # Write the test lines to test.txt
    with open(test_file, 'w+') as f:
        test_lines = sorted(test_lines, key=lambda x:int(x.split()[1]))
        f.writelines(test_lines)

    # Write the remaining train lines back to new split_train.txt
    with open(train_file, 'w+') as f:
        train_lines = sorted(train_lines, key=lambda x:int(x.split()[1]))
        f.writelines(train_lines)

In [49]:
import os
import shutil

def move_files(train_dir, test_dir, test_file):
    # Create the test directory if it doesn't exist
    os.makedirs(test_dir, exist_ok=True)

    # Read each line in test.txt and move the files from train/ to test/
    with open(test_file, 'r') as file:
        for line in file:
            # Extract the file name (assuming the format is 'filename label')
            filename = line.strip().split()[0]

            # Define the source and destination paths
            src_path = os.path.join(train_dir, filename)
            dest_path = os.path.join(test_dir, filename)

            # Move the file from the train directory to the test directory
            if os.path.isfile(src_path):
                shutil.move(src_path, dest_path)
                print(f"Moved: {filename} from {src_path} to {dest_path}")
            else:
                print(f"File not found: {filename}")

    print(f"Files moved to {test_dir}.")
    
# return all test videos to train in order to make a new split
def return_files(train_dir, test_dir, test_file):
    move_files(test_dir, train_dir, test_file)

In [50]:
train_file = "./data/k700-2020/updated_splits/train.txt"
test_file = "./data/k700-2020/updated_splits/test.txt"

train_dir = "./data/k700-2020/updated_splits/train"
test_dir = "./data/k700-2020/updated_splits/test"

In [51]:
# execute this cell to create a new test file
split_train_test(train_file, test_file, test_ratio=0.1)

In [52]:
# move files based on the new test file
move_files(train_dir, test_dir, test_file)

Moved: KBqXxXL-6jo_000425_000435.mp4 from ./data/k700-2020/updated_splits/train/KBqXxXL-6jo_000425_000435.mp4 to ./data/k700-2020/updated_splits/test/KBqXxXL-6jo_000425_000435.mp4
Moved: fHreXo2NfVk_000004_000014.mp4 from ./data/k700-2020/updated_splits/train/fHreXo2NfVk_000004_000014.mp4 to ./data/k700-2020/updated_splits/test/fHreXo2NfVk_000004_000014.mp4
Moved: _5XIY2kCNZI_000085_000095.mp4 from ./data/k700-2020/updated_splits/train/_5XIY2kCNZI_000085_000095.mp4 to ./data/k700-2020/updated_splits/test/_5XIY2kCNZI_000085_000095.mp4
Moved: Svw3VCFSh4Y_000085_000095.mp4 from ./data/k700-2020/updated_splits/train/Svw3VCFSh4Y_000085_000095.mp4 to ./data/k700-2020/updated_splits/test/Svw3VCFSh4Y_000085_000095.mp4
Moved: pUYln16PYJA_000000_000010.mp4 from ./data/k700-2020/updated_splits/train/pUYln16PYJA_000000_000010.mp4 to ./data/k700-2020/updated_splits/test/pUYln16PYJA_000000_000010.mp4
Moved: VYKkuWx7HQ8_000038_000048.mp4 from ./data/k700-2020/updated_splits/train/VYKkuWx7HQ8_000038_0

Moved: i5-0PmJQe2E_000029_000039.mp4 from ./data/k700-2020/updated_splits/train/i5-0PmJQe2E_000029_000039.mp4 to ./data/k700-2020/updated_splits/test/i5-0PmJQe2E_000029_000039.mp4
Moved: 9dyOMEVd9GM_000005_000015.mp4 from ./data/k700-2020/updated_splits/train/9dyOMEVd9GM_000005_000015.mp4 to ./data/k700-2020/updated_splits/test/9dyOMEVd9GM_000005_000015.mp4
Moved: AoU1OSY3qEc_000073_000083.mp4 from ./data/k700-2020/updated_splits/train/AoU1OSY3qEc_000073_000083.mp4 to ./data/k700-2020/updated_splits/test/AoU1OSY3qEc_000073_000083.mp4
Moved: PfTLArcL3m8_000015_000025.mp4 from ./data/k700-2020/updated_splits/train/PfTLArcL3m8_000015_000025.mp4 to ./data/k700-2020/updated_splits/test/PfTLArcL3m8_000015_000025.mp4
Moved: tb9AyhPAK48_000210_000220.mp4 from ./data/k700-2020/updated_splits/train/tb9AyhPAK48_000210_000220.mp4 to ./data/k700-2020/updated_splits/test/tb9AyhPAK48_000210_000220.mp4
Moved: 81ZQ9AdiqYM_000065_000075.mp4 from ./data/k700-2020/updated_splits/train/81ZQ9AdiqYM_000065_0

Moved: ucpOxJOBJFk_000233_000243.mp4 from ./data/k700-2020/updated_splits/train/ucpOxJOBJFk_000233_000243.mp4 to ./data/k700-2020/updated_splits/test/ucpOxJOBJFk_000233_000243.mp4
Moved: _ohRI2gFV1w_000004_000014.mp4 from ./data/k700-2020/updated_splits/train/_ohRI2gFV1w_000004_000014.mp4 to ./data/k700-2020/updated_splits/test/_ohRI2gFV1w_000004_000014.mp4
Moved: O-5Ycq2j1OI_000057_000067.mp4 from ./data/k700-2020/updated_splits/train/O-5Ycq2j1OI_000057_000067.mp4 to ./data/k700-2020/updated_splits/test/O-5Ycq2j1OI_000057_000067.mp4
Moved: yIgXrc3RptI_000157_000167.mp4 from ./data/k700-2020/updated_splits/train/yIgXrc3RptI_000157_000167.mp4 to ./data/k700-2020/updated_splits/test/yIgXrc3RptI_000157_000167.mp4
Moved: ph_te7VKwxQ_000250_000260.mp4 from ./data/k700-2020/updated_splits/train/ph_te7VKwxQ_000250_000260.mp4 to ./data/k700-2020/updated_splits/test/ph_te7VKwxQ_000250_000260.mp4
Moved: bwjX8l97NTg_000040_000050.mp4 from ./data/k700-2020/updated_splits/train/bwjX8l97NTg_000040_0

In [None]:
# return the files back to train based on the test file
# run this before creating a new test split
return_files(train_dir, test_dir, test_file)