<h2>IMPORTS</h2>

In [None]:
import os
import random
import hashlib
from glob import glob

import numpy as np
from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader, ConcatDataset
from torchvision import datasets, transforms

from argparse import ArgumentParser

import warnings
warnings.filterwarnings('ignore')

<h2>SUPPORTING FUNCTIONS</h2>

In [None]:
def _save_images_to_folder(dataset, transform, path, split_name, idx, format_='.png'):
    scales = {}
    for el in dataset:
        img = transform(el[0])
        out = os.path.join(path, split_name, str(el[1]))
        if not os.path.exists(out):
            os.makedirs(out)
        img_path = os.path.join(out, str(idx) + format_)
        img.save(img_path)
        idx += 1
    return idx

<h2>ARGUMENTS</h2>

In [None]:
# PATHS
MNIST_DIR = 'datasets/'
MNIST_SCALE_DIR = 'datasets/'

# SCALE VALUES
min_scale = 0.3
max_scale = 1.0

scales = [0.3, 0.5, 0.7]

# SEED VALUES
seeds = [0, 1, 2, 3, 4, 5]

BUF_SIZE = 65536

# TRAIN_VAL_TEST SIZE
MNIST_TRAIN_SIZE = 100
MNIST_VAL_SIZE = 20
MNIST_TEST_SIZE = 50

transform = transforms.RandomAffine(0, scale=(min_scale, max_scale))

source = 'datasets/'
dest = 'datasets/'

download = True

<h2>DOWNLOAD DATASET</h2>

In [None]:
for seed in seeds:
    
    for min_scale in scales:
    
        print('Seed: ', seed)
        print('min_scale: ', min_scale)

        np.random.seed(seed)
        random.seed(seed)

        dataset_train = datasets.MNIST(root=source, train=True, download=download)
        dataset_test = datasets.MNIST(root=source, train=False, download=download)
        concat_dataset = ConcatDataset([dataset_train, dataset_test])

        labels = [el[1] for el in concat_dataset]
        train_val_size = MNIST_TRAIN_SIZE + MNIST_VAL_SIZE
        train_val, test = train_test_split(concat_dataset, train_size=train_val_size,
                                               test_size=MNIST_TEST_SIZE, stratify=labels)

        labels = [el[1] for el in train_val]
        train, val = train_test_split(train_val, train_size=MNIST_TRAIN_SIZE,
                                          test_size=MNIST_VAL_SIZE, stratify=labels)

        dataset_path = os.path.join(dest, 'MNIST_scale', "seed_{}".format(seed))
        dataset_path = os.path.join(dataset_path, "scale_{}_{}".format(min_scale, max_scale))
        print('OUTPUT: {}'.format(dataset_path))

        idx = _save_images_to_folder(train, transform, dataset_path, 'train', 0, '.png')
        idx = _save_images_to_folder(test, transform, dataset_path, 'test', idx, '.png')
        idx = _save_images_to_folder(val, transform, dataset_path, 'val', idx, '.png')