Performs data augmentation by mirroring (flipping), shifting (left, right, d, t), differentiating (right, down)

In [92]:
import numpy as np
import pandas as pd
import scipy
from numpy import linalg
import cvxopt
from cvxopt import solvers, matrix
from scipy.spatial.distance import pdist, cdist, squareform

import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image


In [93]:
%%time
df_X_train = pd.read_csv('Xtr.csv', header=None, usecols=np.arange(3072))
df_X_test = pd.read_csv('Xte.csv', header=None, usecols=np.arange(3072))
df_y_train = pd.read_csv('Ytr.csv')


X_train = np.array(df_X_train, dtype=float)
X_test = np.array(df_X_test, dtype=float)
y_train = np.array(df_y_train['Prediction'], dtype=float)

CPU times: user 4.69 s, sys: 108 ms, total: 4.8 s
Wall time: 4.8 s


In [94]:
# Data Visualization : The Data set train is balance, OVA can be implemented
classes = np.unique(y_train)
for class_ in classes:
    print(class_, np.mean(class_ == y_train))

0.0 0.1
1.0 0.1
2.0 0.1
3.0 0.1
4.0 0.1
5.0 0.1
6.0 0.1
7.0 0.1
8.0 0.1
9.0 0.1


In [95]:
def rgb2gray(rgb, reshape = True):
    r = rgb[:1024].reshape(-1,1)
    g = rgb[1024:2048].reshape(-1,1)
    b = rgb[2048:].reshape(-1,1)
    colors = np.c_[r, g, b]
    gray = np.dot(colors, [0.299, 0.587, 0.114])
    # from matlab 0.2989 * R + 0.5870 * G + 0.1140 * B
    if reshape is True:
        gray = gray.reshape(32,32)
    return gray
    #return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])


In [96]:
def plot(X, gray=True, lim=True):
    # lim : set a limit number not to crash
    n, p = X.shape
    for kk in range(n):
        if kk>12:
            break
        x = X[kk]
        im = rgb2gray(x)
        plt.imshow(im, cmap='gray')
        plt.show()

### Data augmentation

* We perform 4 rot90 and transpose every time so it will lead to 20K images

In [97]:
def flipping(X, y):
    # Augmentation by flipping (transpose)
    n, p = X.shape
    X_res = np.zeros((n, p)) # FLipped images are assigned here
    X_r = X[:,:1024]
    X_g = X[:,1024:2048]
    X_b = X[:,2048:]
    
    for kk in range(n):
        x_r = np.rot90(np.transpose(X_r[kk].reshape(32,32)), k=3).ravel()
        x_g = np.rot90(np.transpose(X_b[kk].reshape(32,32)), k=3).ravel()
        x_b = np.rot90(np.transpose(X_g[kk].reshape(32,32)), k=3).ravel()
        new_sample = np.r_[x_r, x_g, x_b].reshape(1,-1)
        X_res[kk] = new_sample
    
    return np.r_[X_train, X_res], y

In [98]:
def shift_right(X):
    n, p = X.shape
    
    X_r = X[:,:1024].reshape(n, 32, 32)
    X_g = X[:,1024:2048].reshape(n, 32, 32)
    X_b = X[:,2048:].reshape(n, 32, 32)    
    shifted = np.zeros_like(X) # Will contain the shifted RGB
    colors = [X_r, X_g, X_b]
    for kk, col in enumerate(colors):
        temp = col[:,:,0]
        shifted_col = np.roll(col, axis=2, shift=1)
        shifted_col[:,:,0]= temp
        shifted_col = shifted_col.reshape(n,-1)

        shifted[:, 1024*kk:1024*(kk+1)] = shifted_col
    
    return shifted

def shift_left(X):
    n, p = X.shape
    
    X_r = X[:,:1024].reshape(n, 32, 32)
    X_g = X[:,1024:2048].reshape(n, 32, 32)
    X_b = X[:,2048:].reshape(n, 32, 32)    
    shifted = np.zeros_like(X) # Will contain the shifted RGB
    colors = [X_r, X_g, X_b]
    for kk, col in enumerate(colors):
        temp = col[:,:,-1]
        shifted_col = np.roll(col, axis=2, shift=-1)
        shifted_col[:,:,-1]= temp
        shifted_col = shifted_col.reshape(n,-1)

        shifted[:, 1024*kk:1024*(kk+1)] = shifted_col
    
    return shifted

def shift_top(X):
    n, p = X.shape
    
    X_r = X[:,:1024].reshape(n, 32, 32)
    X_g = X[:,1024:2048].reshape(n, 32, 32)
    X_b = X[:,2048:].reshape(n, 32, 32)    
    shifted = np.zeros_like(X) # Will contain the shifted RGB
    colors = [X_r, X_g, X_b]
    for kk, col in enumerate(colors):
        temp = col[:,-1,:]
        shifted_col = np.roll(col, axis=1, shift=-1)
        shifted_col[:,-1,:]= temp
        shifted_col = shifted_col.reshape(n,-1)

        shifted[:, 1024*kk:1024*(kk+1)] = shifted_col
    
    return shifted

def shift_down(X):
    n, p = X.shape
    
    X_r = X[:,:1024].reshape(n, 32, 32)
    X_g = X[:,1024:2048].reshape(n, 32, 32)
    X_b = X[:,2048:].reshape(n, 32, 32)    
    shifted = np.zeros_like(X) # Will contain the shifted RGB
    colors = [X_r, X_g, X_b]
    for kk, col in enumerate(colors):
        temp = col[:,0,:]
        shifted_col = np.roll(col, axis=1, shift=1)
        shifted_col[:,0,:]= temp
        shifted_col = shifted_col.reshape(n,-1)

        shifted[:, 1024*kk:1024*(kk+1)] = shifted_col
    
    return shifted

In [99]:
def differentiate_right(X):
    
    n, p = X.shape
    
    X_r = X[:,:1024].reshape(n, 32, 32)
    X_g = X[:,1024:2048].reshape(n, 32, 32)
    X_b = X[:,2048:].reshape(n, 32, 32)    
    diff = np.zeros_like(X) # Will contain the differentiate RGB
    colors = [X_r, X_g, X_b]
    for kk, col in enumerate(colors):
        diff_col = np.zeros_like(col)
        temp = col[:,:,0]
        diff_col[:,:,1:] = np.diff(col, axis=2) #np.roll(col, axis=2, shift=1)
        diff_col[:,:,0]= temp
        diff_col = diff_col.reshape(n,-1)
        diff[:, 1024*kk:1024*(kk+1)] = diff_col
    
    return diff

In [100]:
def differentiate_down(X):
    
    n, p = X.shape
    
    X_r = X[:,:1024].reshape(n, 32, 32)
    X_g = X[:,1024:2048].reshape(n, 32, 32)
    X_b = X[:,2048:].reshape(n, 32, 32)    
    diff = np.zeros_like(X) # Will contain the differentiate RGB
    colors = [X_r, X_g, X_b]
    for kk, col in enumerate(colors):
        diff_col = np.zeros_like(col)
        temp = col[:,0,]
        diff_col[:,1:,:] = np.diff(col, axis=1) #np.roll(col, axis=2, shift=1)
        diff_col[:,0,:]= temp
        diff_col = diff_col.reshape(n,-1)
        diff[:, 1024*kk:1024*(kk+1)] = diff_col
    
    return diff

In [101]:
diff_right = differentiate_right(X_train)
diff_down = differentiate_down(X_train)

In [102]:
r_shifted = shift_right(X_train)
l_shifted = shift_left(X_train)
t_shifted = shift_top(X_train)
d_shifted = shift_down(X_train)

In [103]:
X_res = np.r_[diff_right, diff_down, r_shifted, l_shifted, t_shifted, d_shifted]
y_res = np.tile(y_train, 6)

In [None]:
# Saving the results 
np.save('X_train_rgb', X_res)
np.save('y_train_rgb', y_res)