### Extract images from CIFAR-10 datasets 

In [1]:
import mxnet as mx
import numpy as np
import pickle
import cv2

In [2]:
def extractImagesAndLabels(path, file):
    f = open(path+file, 'rb')
    dict_ = pickle.load(f, encoding='bytes')
    images = dict_[b'data']
    images = np.reshape(images, (10000, 3, 32, 32))
    labels = dict_[b'labels']
    img_flat = mx.nd.array(images)
    lab_flat = mx.nd.array(labels)
    return img_flat, lab_flat

def extractCategories(path, file):
    f = open(path+file, 'rb')
    dict_ = pickle.load(f, encoding='bytes')
    return dict_[b'label_names']

def saveCifarImage(array, path, file):
    # array is 3x32x32. cv2 needs 32x32x3
    array = array.asnumpy().transpose(1,2,0)
    # array is RGB. cv2 needs BGR
    array = cv2.cvtColor(array, cv2.COLOR_RGB2BGR)
    # save to PNG file
    return cv2.imwrite(path+file+".png", array)

In [3]:
path = '../data/cifar-10-batches-py/'
prefix = 'data_batch_'

imgs_train, labels_train = list(), list()
imgs_test, labels_test = list(), list()

### extract data from the training batches 
for i in range(1,6):
    imgs_train_flat, labs_train_flat = extractImagesAndLabels(path, prefix+str(i))
    imgs_train.extend(imgs_train_flat)
    labels_train.extend(labs_train_flat)
    
### extract data from the testing batch
imgs_test_flat, labs_test_flat = extractImagesAndLabels(path, 'test_batch')
imgs_test.extend(imgs_test_flat)
labels_test.extend(labs_test_flat)

In [4]:
assert len(imgs_train) == len(labels_train)
assert len(imgs_test) == len(labels_test)

In [5]:
### delete everything before image-saving starts 
def refresh_folder(path_to_folder):
    import os, shutil
    for file in os.listdir(path_to_folder):
        file_path = os.path.join(path_to_folder, file)
        
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(e)

In [6]:
refresh_folder("../data/training/img/")
refresh_folder("../data/training/label/")

In [7]:
categories = extractCategories("../data/cifar-10-batches-py/", "batches.meta")
batches_train = []

for i in range(len(imgs_train)):
    saveCifarImage(imgs_train[i], "../data/training/img/", "img-"+(str)(i))
    category = labels_train[i].asnumpy()
    category = (int)(category[0])
    batches_train.append(categories[category])
    
f = open('../data/training/label/labels.txt', 'w+')
for i in range(len(batches_train)):
    f.write(f'img-{i}.png:{batches_train[i]}\n')
f.close()

In [8]:
refresh_folder("../data/testing/img/")
refresh_folder("../data/testing/label/")

In [9]:
batches_test = []

for i in range(len(imgs_test)):
    saveCifarImage(imgs_test[i], "../data/testing/img/", "img-"+(str)(i))
    category = labels_test[i].asnumpy()
    category = (int)(category[0])
    batches_test.append(categories[category])
    
f = open('../data/testing/label/labels.txt', 'w+')
for i in range(len(batches_test)):
    f.write(f'img-{i}.png:{batches_test[i]}\n')
f.close()