### Create Rotated MNIST
rotated MNIST data set creator.
- for a set of digits, randomly sample n instances
- rotate every instance k times randomly between 0 to 360 degrees

In [2]:
import os
import numpy as np
import scipy as sp
import cv2
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from collections import defaultdict
import matplotlib.pyplot as plt

mnist = input_data.read_data_sets('data/', one_hot=True)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


In [3]:
def rotate(img, num_rot=10, show_imgs=False):
    """ Rotates an image, with evenly split rotations of 360/num_rot.
    Args:
        img:        image to be rotated
        num_rot:    number of rotations
        show_imgs:  plot images during rotation
    Returns:
        rotated_imgs: rotated image tensor"""
    
    rows,cols = img.shape
    img = img.copy()
    
    rotated_imgs = []
    for r in range(num_rot):#np.linspace(0,360,num_rot, dtype=np.int32):
        rot = np.random.random()*360.
        M = cv2.getRotationMatrix2D((cols/2,rows/2), rot,1)
        img_rot = cv2.warpAffine(img,M,(cols,rows))
        rotated_imgs.append(img_rot.flatten())
        
        if show_imgs:
            plt.imshow(img_rot, interpolation='none', cmap='gray')
            plt.show()
        
    return rotated_imgs

In [4]:
def sample_images(classes=[0,1,2,3,4,5,6,7,8,9], num_unique_samples = 1, num_rot_samples=100):
    """ Sample images from MNIST classes, and create rotated dataset
    Args:
        classes:             classes to sample from
        num_unique_samples:  number of unique samples per class 
        num_rot_samples:     number of rotation angles per class
    Returns:
        to_save_imgs:    rotated images tensor
        to_save_labels:  rotated labels tensor"""
    
    to_save_imgs = []
    to_save_labels = []
    
    for c in classes:
        imgs = mnist.train.images[np.argmax(mnist.train.labels, -1) == c]
        sample = np.random.choice(np.shape(imgs)[0], num_unique_samples, replace=False)
        
        for s in sample:
            img = np.reshape(imgs[s, :] , (28, 28))
            rotated_imgs = rotate(img, num_rot_samples)
            to_save_imgs.append(rotated_imgs)
            to_save_labels.append(np.full((num_rot_samples,1), c, dtype=np.int32))
        print('\r rotated: %d' % (c), end='')
        
    print ('\r--->done creating rotated data set', end='')
    
    to_save_imgs = np.reshape(to_save_imgs, (-1,784))
    to_save_labels = np.asarray(to_save_labels).flatten()
    return to_save_imgs, to_save_labels

In [5]:
def split(imgs, labels, val_ratio=0.05, test_ratio=0.1):
    """Splits the total dataset in a train, val, and test set
    Args:
        imgs:        tensor with images
        labels:      labels of images
        val_ratio:   ratio of validation set
        test_ratio:  ratio of test set"""
    
    val_size = int(imgs.shape[0] * val_ratio)
    test_size = int(imgs.shape[0] * test_ratio)
    
    # randomly samples a balanced val + test idx set
    val_test_idx =  [np.random.choice(np.where(labels == l)[0],
                    (val_size + test_size) // len(classes),
                    replace = False)
                    for l in classes]
    val_test_idx = np.asarray(val_test_idx).flatten()
    
    val_idx =  [np.random.choice(
                    val_test_idx[np.where(labels[val_test_idx] == l)[0]],
                    (val_size) // len(classes),
                    replace = False)
                    for l in classes]
    val_idx = np.asarray(val_idx).flatten()    
    test_idx = list(set(val_test_idx) - set(val_idx))
    train_idx = list(set(np.arange(np.shape(labels)[0])) - set(val_test_idx))
    
    return imgs[train_idx], labels[train_idx], \
           imgs[val_idx], labels[val_idx], \
           imgs[test_idx], labels[test_idx]
    

def save_data(names, data, pre_name='rot'):
    pre_name = 'data/' + pre_name
    if not os.path.exists(pre_name):
        os.mkdir(pre_name)
    
    for n, d in zip(names, data):
        n = ('./%s/rmnist.' % pre_name) + n
        np.save(n,d)
        
    print ('\n saved data ')

#### Actually call routine to create rotated data set

In [6]:
# get rotated samples of size: num_classes * num_s * num_rotations
classes = [3]
num_unique_samples = 1
num_rot_samples = 10000
imgs, labels = sample_images(classes = classes,
                             num_unique_samples = num_unique_samples,
                             num_rot_samples = num_rot_samples)

# split up in train, validation, and test sets
train_im, train_l, val_im, val_l, test_im, test_l = split(imgs, labels)

# save to data folder
pre_name= ('%sc%ds%dr' % (''.join(map(lambda x: str(x), classes)),
                          num_unique_samples, 
                          num_rot_samples))

names = ['digits.train.npy', 'labels.train.npy', \
         'digits.validation.npy', 'labels.validation.npy', \
         'digits.test.npy', 'labels.test.npy']
data = [train_im, train_l, val_im, val_l, test_im, test_l]
save_data(names, data, pre_name)

 rotated: 3--->done creating rotated data set
 saved data 


#### Rotated sanity check:

In [None]:
% matplotlib inline

digit = np.random.randint(0,len(labels))
fig, axs = plt.subplots(2,5)
axs = axs.flatten()
for j in range(10):
    im = np.reshape(imgs[digit + j*(num_rot_samples // 10)], (28,28))
    axs[j].axis('off')
    axs[j].imshow(im, interpolation='none', cmap='gray_r')

plt.show()