In [75]:
import json
import os
import cv2
import shutil
import glob
import random

In [76]:
# First create a folder for my dataset called "camera_shot", and sub-folders train, valid, test.
def create_dataset_folders(dataset_name):
    subfolders = ['train', 'valid', 'test']

    for subfolder in subfolders:
        path = os.path.join(dataset_name, subfolder)
        os.makedirs(path, exist_ok=True)

# Usage
create_dataset_folders("camera_shot_data")

In [77]:
# Load the JSON file
with open('v1_split_trailer.json') as f:
    data = json.load(f)

In [78]:
# Transform the JSON structure
for split in data:
    for example in data[split]:
        clip_name = list(data[split][example].keys())[0]
        temp = {"clip_name": 'shot_' + clip_name + '.mp4', 
                "label": data[split][example][clip_name]["scale"]['label'],
                "value": data[split][example][clip_name]["scale"]['value'],}
        data[split][example] = temp

# Save the transformed JSON
with open('camera_shot_split.json', 'w') as f:
    json.dump(data, f, indent=4)

In [79]:
# Now get a dictionary of all classes
# I.e. I want class_dict = {0: 'EC', 1: 'CS', 2: 'MS', 3: 'FS', 4: 'LS'} (or some other order, given that files usually are alphabetically sorted
# it would more likely be {0: 'CS', 1: 'EC', 2: 'FS', 3: 'LS', 4: 'MS'})
# 'EC': Extreme close-up 
# 'CS': Close-up shot
# 'MS': Medium shot
# 'FS': Full shot
# 'LS': Long shot
class_dict = {}
for split in data:
    for example in data[split]:
        if data[split][example]['value'] not in class_dict.keys():
            class_dict[data[split][example]['value']] = data[split][example]['label']
print(class_dict)

{1: 'CS', 2: 'MS', 0: 'ECS', 3: 'FS', 4: 'LS'}


In [80]:
# Now for the keys in class_dict, create them as folders in train, test, and valid subfolders. of camera_shot
# i.e. I want to create folders EC, CS, MS, FS, LS in train, valid, test.
def create_class_folders(class_labels):
    subfolders = ['train', 'valid', 'test']
    for subfolder in subfolders:
        for key in class_labels:
            path = os.path.join("camera_shot_data", subfolder, class_dict[key])
            os.makedirs(path, exist_ok=True)
create_class_folders(class_dict)

In [81]:
# Now that we have the folders, it's time to move the files into the folders.
# But first, we need to change the video file into a single image by first taking the
# clip corresponding to clip_name field in our camera_shot_split.json file.
# Then simply take the first frame of the clip and save it as a jpg file.
# Lets define functions to do this. 

# First get the path to the clip whose first frame I need to extract.
def get_clip_path(path_to_example, clip_name):
    return os.path.join(path_to_example, clip_name)

# Now get the first frame of the clip.
def get_middle_frame(clip_path):
    # Open the video file
    cap = cv2.VideoCapture(clip_path)

    # Check if video opened successfully
    if not cap.isOpened():
        print("Error opening video file")

    # Get the total number of frames in the video
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

    # Set the position of the video file capture to the middle frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)

    # Read the middle frame
    ret, frame = cap.read()

    # Release the VideoCapture object
    cap.release()

    # If frame is read correctly, ret is True
    if ret:
        # Return the middle frame as image data
        return frame
    else:
        return None

# Now save the image data as a jpg file.
def save_image(image_data, filename):
    # Save the image data as a jpg image file
    cv2.imwrite(filename, image_data)

In [88]:
# Finally, traverse through the entire dataset and save the first frame of each clip as a jpg file,
# into the corresponding folder. So we have to use the camera_shot_split.json file to get the clip_name,
# and the class label, and then save the image either into train, valid, or test folder.

# Load the JSON file
with open('camera_shot_split.json') as f:
    data = json.load(f)

# First get the path to the clip whose first frame I need to extract.
DATASET_PATH = '/Users/thomaslim/trailer'
for split in data:
    for example in data[split]:
        clip_name = data[split][example]['clip_name']
        clip_path = get_clip_path(os.path.join(DATASET_PATH, example), clip_name)
        image_data = get_middle_frame(clip_path)
        if image_data is not None:
            # Save the image data as a jpg image file
            save_image(image_data, os.path.join('camera_shot_data', split, data[split][example]['label'], clip_name+'.jpg'))

In [90]:
# Since the dataset split is in terms of only train and test, I need to split the train into train and valid.
# Basically for each class label folder in train move 20% of the files into valid.

# First get the list of all class label folders in train.
train_path = os.path.join('camera_shot_data', 'train')
class_labels = os.listdir(train_path)

# Now for each class label folder in train, move 20% of the files into valid.
for label in class_labels:
    # Get all the files in the class label folder
    files = glob.glob(os.path.join(train_path, label, '*'))
    
    # Shuffle the files
    random.shuffle(files)
    
    # Calculate the number of files to move
    num_to_move = int(len(files) * 0.2)
    
    # Get the files to move
    files_to_move = files[:num_to_move]
    
    # Create the corresponding folder in 'valid' if it doesn't exist
    valid_folder = os.path.join('camera_shot_data', 'valid', label)
    os.makedirs(valid_folder, exist_ok=True)
    
    # Move the files
    for file in files_to_move:
        shutil.move(file, valid_folder)

In [92]:
# Get number of training data
num_train = 0
for label in class_labels:
    num_train += len(os.listdir(os.path.join(train_path, label)))

In [93]:
num_train

211