# Generate Augmented MNIST
##### Sean Wade

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.datasets import mnist
import os
import numpy as np

Using TensorFlow backend.


In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
img_rows, img_cols = 28, 28
y_train = np_utils.to_categorical(y_train,10)
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [18]:
# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,  # randomly flip images
    vertical_flip=False)  # randomly flip images

# Prepare the generator
datagen.fit(X_train)

In [23]:
data, labels = np.array([]), np.array([])


In [23]:
i = 0
for batch in datagen.flow(X_train, y_train, batch_size=100):
    
    x_batch, y_batch = batch
    
    data = np.concatenate((data, x_batch.reshape(100, 784)))
    labels = np.concatenate((labels, y_batch))

    i += 1
    if i > 7000:
        break  # otherwise the generator would loop indefinitely

In [27]:
data.shape

(700132, 784)

In [26]:
np.save('aug_mnist_data2', data)

In [49]:
data = x_batch.reshape(32, 784)

ValueError: cannot reshape array of size 78400 into shape (32,784)

In [19]:
i = 0
for batch in datagen.flow(X_train, y_train, batch_size=32,
                          save_to_dir='aug_data', save_prefix='mnist', save_format='jpeg'):
    data, labels = batch
    i += 1
    if i > 2:
        break  # otherwise the generator would loop indefinitely

In [12]:
directory = './aug_data/'
for filename in os.listdir(directory):
    if filename.endswith(".jpeg"):
        print os.path.join(directory, filename)

./aug_data/mnist_705_5087.jpeg
./aug_data/mnist_1001_3299.jpeg
./aug_data/mnist_1629_9009.jpeg
./aug_data/mnist_1172_679.jpeg
./aug_data/mnist_1702_7173.jpeg
./aug_data/mnist_357_2620.jpeg
./aug_data/mnist_437_5405.jpeg
./aug_data/mnist_330_7686.jpeg
./aug_data/mnist_778_5658.jpeg
./aug_data/mnist_753_4715.jpeg
./aug_data/mnist_1840_7960.jpeg
./aug_data/mnist_965_8571.jpeg
./aug_data/mnist_1294_1140.jpeg
./aug_data/mnist_485_8916.jpeg
./aug_data/mnist_288_4209.jpeg
./aug_data/mnist_550_2331.jpeg
./aug_data/mnist_703_1171.jpeg
./aug_data/mnist_1610_3810.jpeg
./aug_data/mnist_347_645.jpeg
./aug_data/mnist_720_6975.jpeg
./aug_data/mnist_1170_4133.jpeg
./aug_data/mnist_423_5506.jpeg
./aug_data/mnist_1613_7647.jpeg
./aug_data/mnist_1543_8885.jpeg
./aug_data/mnist_1445_9418.jpeg
./aug_data/mnist_670_3725.jpeg
./aug_data/mnist_1827_9099.jpeg
./aug_data/mnist_567_5254.jpeg
./aug_data/mnist_1057_3702.jpeg
./aug_data/mnist_1125_1297.jpeg
./aug_data/mnist_1493_3134.jpeg
./aug_data/mnist_1458_9490