# Prepare data for sprite sheet generation (E1, E2) models



In [1]:
import os
import glob
import shutil

from PIL import Image

import common_functions as core

FRAMES_DATA_PATH = core.get_path(core.E1.raw_data_dir)
print(FRAMES_DATA_PATH)

WALK_FRAME_COUNT = 8
DIRECTIONS = ["back", "front", "left", "right"]

INPUT_DATA = core.get_path(core.E1.prepared_data_dir, f"input")
OUTPUT_DATA = core.get_path(core.E1.prepared_data_dir, f"output")
COMBINED_DATA = core.get_path(core.E1.prepared_data_dir, f"combined")

E2_PREPARED_DATA = core.get_path(core.E2.prepared_data_dir)

# Folder names to store train, test, validation data
split_folders = ("train", "test", "val")

IMAGE_IDS = set()
raw_images = glob.glob(f"{FRAMES_DATA_PATH}/*.png")
for _img in raw_images:
    _img_base = os.path.basename(_img)
    _image_id = int(_img_base.split("_")[0])
    IMAGE_IDS.add(_image_id)
IMAGE_IDS = sorted(IMAGE_IDS)
IMAGE_COUNT = len(IMAGE_IDS)

---------------- MPS GPU Detected --------------------
/Users/tashvit/Documents/GitHub/mmpixagen/datasets/unprepared_data/input_e/walk/


In [2]:
def get_image_pairs(image_id, raw_data_path, frame_count = WALK_FRAME_COUNT, directions = DIRECTIONS):
    """
    Function to get input/output image pairs for given image_id
    :param image_id: input image id
    :param raw_data_path: where the raw images are stored
    :param frame_count: frames per a walking sprite sheet
    :param directions: walking directions
    :returns: image pairs
    """
    frames = []
    for direction in directions:
        for frame in range(1, frame_count):
            prev_image_path = os.path.join(raw_data_path, f"{image_id:04d}_{direction}_{frame - 1}.png")
            image_path = os.path.join(raw_data_path, f"{image_id:04d}_{direction}_{frame}.png")
            frames.append((prev_image_path, image_path))
    return frames

def prepare_data(raw_data_path, model_inp_dir = INPUT_DATA, model_out_dir = OUTPUT_DATA, split = (0.8, 0.9, 1.0), image_count = IMAGE_COUNT, image_ids = IMAGE_IDS):
    """
    Function to prepare data folders
    :param raw_data_path: original images
    :param model_inp_dir: folder for storing current frame of animation
    :param model_out_dir: folder for storing next frame of animation -> target images (ground truth images)
    :param split: split points for train, test, validation
    :param image_count: number of images to prepare
    :param image_ids: image ids
    """
    for base_folder in [model_inp_dir, model_out_dir]:
        for split_folder in split_folders:
            os.makedirs(os.path.join(base_folder, split_folder), exist_ok=True)
    # Get split point for train data (0 -> train_end)
    train_end = int(image_count * split[0])
    # Get split point for test data (train_end -> test_end)
    # Validation data is from test_end -> end
    test_end = int(image_count * split[1])
    counter = 0
    for image_id in image_ids:
        # Select subdirectory for file based on index in for loop
        subdir = "train"
        if train_end < image_id < test_end:
            subdir = "test"
        elif image_id >= test_end:
            subdir = "val"
        pairs = get_image_pairs(image_id, raw_data_path)
        for input_image, output_image in pairs:
            shutil.copyfile(input_image, os.path.join(model_inp_dir, subdir, f"{counter:05d}.png"))
            shutil.copyfile(output_image, os.path.join(model_out_dir, subdir, f"{counter:05d}.png"))
            counter += 1
            
def prepare_data_e2(raw_data_path, sprites_data_path, split = 0.8, image_count = IMAGE_COUNT, image_ids = IMAGE_IDS):
    split_folders_e2 = ["train", "test"]
    for split_folder in split_folders_e2:
        os.makedirs(os.path.join(sprites_data_path, split_folder), exist_ok=True)
    test_split = int(image_count * split)
    for idx, image_id in enumerate(image_ids):
        folder = "train"
        if idx >= test_split:
            folder = "test"
        frame = 0
        image_folder = f"{idx:04d}"
        target_folder = os.path.join(sprites_data_path, folder, image_folder)
        os.makedirs(target_folder, exist_ok=True)
        for direction in DIRECTIONS:
            for image_num in range(8):
                image = f"{image_id:04d}_{direction}_{image_num}.png"
                image_path = os.path.join(raw_data_path, image)
                # Convert image to 256x256 as required by fomm model
                image = Image.open(image_path).convert('RGB').resize((256, 256), resample=Image.Resampling.NEAREST)
                image.save(os.path.join(target_folder, f"{frame:02d}.png"))
                frame += 1

In [3]:
prepare_data(FRAMES_DATA_PATH)
prepare_data_e2(FRAMES_DATA_PATH, E2_PREPARED_DATA)

# Combined input, output data folders for model B
os.makedirs(COMBINED_DATA, exist_ok=True)
core.create_combined_images(f"--fold_A {INPUT_DATA} --fold_B {OUTPUT_DATA} --fold_AB {COMBINED_DATA} --no_multiprocessing")

----------------------------------------------------------
python /Users/tashvit/Documents/GitHub/mmpixagen/thirdparty/pix2pix/datasets/combine_A_and_B.py --fold_A /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_e_data/input --fold_B /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_e_data/output --fold_AB /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_e_data/combined --no_multiprocessing
[fold_A] =  /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_e_data/input
[fold_B] =  /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_e_data/output
[fold_AB] =  /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_e_data/combined
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  True
split = test, use 1344/1344 images
split = test, number of images = 1344
split = train, use 6720/6720 images
split = train, number of images = 6720
split = val, use 28224/28224 images
split = val, number of images = 28224
