In [20]:
import gzip
import numpy as np

def extract_images(path, num_images):
    """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
    # Get the data.
    with gzip.open(path, 'rb') as f:
        train_images = np.frombuffer(f.read(), dtype=np.uint8, offset=16)
        train_images = train_images.reshape(num_images, 28, 28)
    return train_images

def extract_labels(path):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    # Get the data.
    with gzip.open(path, 'rb') as f:
        train_labels = np.frombuffer(f.read(), dtype=np.uint8, offset=8)
    return train_labels

train_images = extract_images('../data/train-images-idx3-ubyte.gz', 60000)
train_labels = extract_labels('../data/train-labels-idx1-ubyte.gz')

test_images = extract_images('../data/t10k-images-idx3-ubyte.gz', 10000)
test_labels = extract_labels('../data/t10k-labels-idx1-ubyte.gz')

In [30]:
import os
import cv2
import random

save_folder = '../data/mnist'

train_indices = random.sample(range(len(train_labels)), 50000)
val_indices = set(range(60000)).difference(set(train_indices))

In [32]:
from tqdm import tqdm

for i in tqdm(train_indices):
    image, label = train_images[i], train_labels[i]
    label_folder = os.path.join(save_folder, 'train', str(label))
    if not os.path.exists(label_folder):
        os.makedirs(label_folder, exist_ok=True)

    cv2.imwrite(os.path.join(label_folder, str(i) + '.png'), image)

100%|██████████| 50000/50000 [00:03<00:00, 15348.37it/s]


In [33]:
for i in tqdm(val_indices):
    image, label = train_images[i], train_labels[i]
    label_folder = os.path.join(save_folder, 'val', str(label))
    if not os.path.exists(label_folder):
        os.makedirs(label_folder, exist_ok=True)

    cv2.imwrite(os.path.join(label_folder, str(i) + '.png'), image)

100%|██████████| 10000/10000 [00:00<00:00, 16257.29it/s]


In [36]:
for i in tqdm(range(10000)):
    image, label = test_images[i], test_labels[i]
    label_folder = os.path.join(save_folder, 'test', str(label))
    if not os.path.exists(label_folder):
        os.makedirs(label_folder, exist_ok=True)

    cv2.imwrite(os.path.join(label_folder, str(i) + '.png'), image)

100%|██████████| 10000/10000 [00:00<00:00, 18274.03it/s]


usage: ipykernel_launcher.py [-h] [--batch-size N] [--test-batch-size N]
                             [--epochs N] [--lr LR] [--gamma M] [--no-cuda]
                             [--dry-run] [--seed S] [--log-interval N]
                             [--save-model]
ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"a720d07c-e4e8-4e74-b3d9-373221998bd7" --shell=9002 --transport="tcp" --iopub=9004 --f=/tmp/tmp-188109JcTtrW7SOdmT.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
