# Dataset Saving

- UCI ML hand-written digits datasets
    - data object type: gray image
    - number of images: 1797
    - image size: 8 * 8
    - classes: 0, ..., 9

In [None]:
import os
import cv2 as cv
import numpy as np
from sklearn.datasets import load_digits

digits = load_digits()

X_raw = (digits.images.astype(float) / 16 * 255).astype(np.uint8)
y = digits.target

dataset_name = 'UCI_handwritten_digits'
if not os.path.exists(dataset_name):
    os.makedirs(dataset_name)

for i, img in enumerate(X_raw):
    cv.imwrite(f'{dataset_name}/{i}.png', img)

- Cifar-10
    - data object type: color image
    - numbre of images: 50000 (train) + 10000 (test)
    - image size: 32 * 32
    - classes:
        - 0 = airplane
        - 1 = automobile
        - 2 = bird
        - 3 = cat
        - 4 = deer
        - 5 = dog
        - 6 = frog
        - 7 = horse
        - 8 = ship
        - 9 = truck

In [None]:
import os
import cv2 as cv
import tensorflow_datasets as tfds

cifar10 = tfds.load('cifar10', download=True)
train = [*tfds.as_numpy(cifar10)['train']]
test = [*tfds.as_numpy(cifar10)['test']]

dataset_name = 'Cifar-10'
if not os.path.exists(dataset_name):
    os.makedirs(dataset_name)
if not os.path.exists(f'{dataset_name}/train'):
    os.makedirs(f'{dataset_name}/train')
if not os.path.exists(f'{dataset_name}/test'):
    os.makedirs(f'{dataset_name}/test')

for i in range(len(train)):
    img = train[i]['image']
    title = train[i]['id'].decode()
    cv.imwrite(f'{dataset_name}/train/{title}.png', img)
for i in range(len(test)):
    img = test[i]['image']
    title = test[i]['id'].decode()
    cv.imwrite(f'{dataset_name}/test/{title}.png', img)

- imagenette
    - data object type: color image
        - numbre of images: 12894 (train) + 500 (validation)
        - image size: >= 160 * 160 (shortest side is 160)
        - classes:
            - n01440764 = 'tench'
            - n02102040 = 'English springer'
            - n02979186 = 'cassette player'
            - n03000684 = 'chain saw'
            - n03028079 = 'church'
            - n03394916 = 'French horn'
            - n03417042 = 'garbage truck'
            - n03425413 = 'gas pump'
            - n03445777 = 'golf ball'
            - n03888257 = 'parachute'

In [18]:
import os
import cv2 as cv
import tensorflow_datasets as tfds

imagenette = tfds.load('imagenette/160px', download=True)
train = [*tfds.as_numpy(imagenette)['train']]
validation = [*tfds.as_numpy(imagenette)['validation']]

dataset_name = 'imagenette'
if not os.path.exists(dataset_name):
    os.makedirs(dataset_name)
if not os.path.exists(f'{dataset_name}/train'):
    os.makedirs(f'{dataset_name}/train')
if not os.path.exists(f'{dataset_name}/validation'):
    os.makedirs(f'{dataset_name}/validation')

for i in range(len(train)):
    img = train[i]['image']
    label = train[i]['label']
    cv.imwrite(f'{dataset_name}/train/{i}-{label}.png', img)
for i in range(len(validation)):
    img = validation[i]['image']
    label = validation[i]['label']
    cv.imwrite(f'{dataset_name}/validation/{i}-{label}.png', img)