# Imports

In [1]:
import os
from tqdm.auto import tqdm
import numpy as np

# Constants

In [None]:
TRAIN_SRC_FOLDER = 'data/processed/train'
TRAIN_DEST_FOLDER = 'data/thumbnails/train'
TEST_SRC_FOLDER = 'data/processed/test'
TEST_DEST_FOLDER = 'data/thumbnails/test'
NUM_PATCHES_X = 8
NUM_PATCHES_Y = 8

# Train

In [None]:
files = [f for f in os.listdir(TRAIN_SRC_FOLDER) if f.endswith('.npz')]
for f in tqdm(files):
    src_path = os.path.join(TRAIN_SRC_FOLDER, f)
    with np.load(src_path) as data:
        X = data['X']
        y = data['y']
    
    patch_size_x = X.shape[1] // NUM_PATCHES_X
    patch_size_y = X.shape[2] // NUM_PATCHES_Y

    X_patches = []
    y_patches = []

    for i in range(NUM_PATCHES_X):
        for j in range(NUM_PATCHES_Y):
            start_x = i * patch_size_x
            end_x = start_x + patch_size_x
            start_y = j * patch_size_y
            end_y = start_y + patch_size_y

            X_patch = X[:, start_x:end_x, start_y:end_y]
            y_patch = y[start_x:end_x, start_y:end_y]

            X_patches.append(X_patch)
            y_patches.append(y_patch)

    for i, (X_patch, y_patch) in enumerate(zip(X_patches,y_patches)):
        filename = f"{f.split('.')[0]}_thumbnail_{i+1}.npz"
        dest_path = os.path.join(TRAIN_DEST_FOLDER, filename)
        np.savez(dest_path, X=X_patch, y=y_patch)

# Test

In [None]:
files = [f for f in os.listdir(TEST_SRC_FOLDER) if f.endswith('.npz')]
for f in tqdm(files):
    src_path = os.path.join(TEST_SRC_FOLDER, f)
    with np.load(src_path) as data:
        X = data['X']
        y = data['y']
    
    patch_size_x = X.shape[1] // NUM_PATCHES_X
    patch_size_y = X.shape[2] // NUM_PATCHES_Y

    X_patches = []
    y_patches = []

    for i in range(NUM_PATCHES_X):
        for j in range(NUM_PATCHES_Y):
            start_x = i * patch_size_x
            end_x = start_x + patch_size_x
            start_y = j * patch_size_y
            end_y = start_y + patch_size_y

            X_patch = X[:, start_x:end_x, start_y:end_y]
            y_patch = y[start_x:end_x, start_y:end_y]

            X_patches.append(X_patch)
            y_patches.append(y_patch)

    for i, (X_patch, y_patch) in enumerate(zip(X_patches,y_patches)):
        filename = f"{f.split('.')[0]}_thumbnail_{i+1}.npz"
        dest_path = os.path.join(TEST_DEST_FOLDER, filename)
        np.savez(dest_path, X=X_patch, y=y_patch)