In [1]:
% load_ext autoreload
% autoreload 2
# Plot configurations
% matplotlib inline
from __future__ import print_function
import tensorflow as tf
import numpy as np
from cifar_utils import load_data
from matplotlib import pyplot as plt


  from ._conv import register_converters as _register_converters


In [3]:
# Load the raw CIFAR-10 data.
X_train, y_train = load_data(mode='train')

mask = np.arange(X_train.shape[0])
np.random.shuffle(mask)
X_train = X_train[mask]
y_train = y_train[mask]


./data/cifar-10-python.tar.gz already exists. Begin extracting...


In [4]:
################## global contrast normalization ######################
lamda = 10
epsilon = 1e-7

#Because of memory issue, the pre-process of test data has to be after training data
mean_image = np.mean(X_train, axis=0)
X_train = X_train.astype(np.float32) - mean_image.astype(np.float32)
contrast = np.sqrt(lamda + np.mean(X_train**2,axis=0))
X_train = X_train / np.maximum(contrast, epsilon)

################## ZCA whitening #########################
temp = []
principal_components = []
for c in range(3):
    X = X_train[:,c*1024:(c+1)*1024]
    cov = np.dot(X.T, X) / (X.shape[0]-1)
    u, s, _ = np.linalg.svd(cov)
    principal_components.append( np.dot(np.dot(u, np.diag(1. / np.sqrt(s + 10e-7))), u.T) )

    # Apply ZCA whitening
    whitex = np.dot(X, principal_components[c])
    temp.append(whitex)

X_train = np.append(temp[0],temp[1],axis=1)
X_train = np.append(X_train,temp[2],axis=1)
X_train = X_train.reshape([50000,3,32,32]).transpose((0,2,3,1))

In [5]:
num_training = 45000
num_validation = 5000

X_val = X_train[-num_validation:, :]
y_val = y_train[-num_validation:]

X_train = X_train[:num_training, :]
y_train = y_train[:num_training]

X_test = []
y_test = []

X_test, y_test = load_data(mode='test')
X_test = X_test.astype(np.float32) - mean_image
contrast = np.sqrt(lamda + np.mean(X_test**2,axis=0))
X_test = X_test / np.maximum(contrast, epsilon)

for c in range(3): 
    X = X_test[:,c*1024:(c+1)*1024]
    whitex = np.dot(X, principal_components[c])
    temp[c] = whitex

X_test = np.append(temp[0],temp[1],axis=1)
X_test = np.append(X_test,temp[2],axis=1)
X_test = X_test.reshape([10000,3,32,32]).transpose((0,2,3,1))

print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
#print('Test data shape: ', X_test.shape)
#print('Test labels shape: ', y_test.shape)

./data/cifar-10-python.tar.gz already exists. Begin extracting...
Train data shape:  (45000, 32, 32, 3)
Train labels shape:  (45000,)
Validation data shape:  (5000, 32, 32, 3)
Validation labels shape:  (5000,)


In [6]:
del temp
del whitex
del principal_components
del u
del s
del X

In [7]:
# No Regularizer
from binary import training
tf.reset_default_graph()
training(X_train, y_train, X_val, y_val, X_test, y_test,
                     is_binary = False, 
                     is_stochastic = False, 
                     conv_featmap = [128, 128, 256, 256, 512, 512],
                     fc_units = [1024, 1024],
                     conv_kernel_size = [3, 3, 3, 3, 3, 3],
         pooling_size = [2, 2, 2],
         lr_start = 0.001,
         lr_end = 0.0001, 
         epoch = 20,
         batch_size = 50,
         is_drop_out = False,
         verbose = True,
         pre_trained_model = None)

number of batches for training: 900
epoch 1 
average train loss: 0.552013544374042 ,  average accuracy : 61.08444444444444%
update learning rate:  [0.00089125097]
validation accuracy : 65.53999999999999%
* Best accuracy: 65.53999999999999%
epoch 2 
average train loss: 0.458395932647917 ,  average accuracy : 78.05777777777777%
update learning rate:  [0.0007943282]
validation accuracy : 70.94%
* Best accuracy: 70.94%
epoch 3 
average train loss: 0.43521072917514375 ,  average accuracy : 83.25555555555556%
update learning rate:  [0.0007079457]
validation accuracy : 72.36%
* Best accuracy: 72.36%
epoch 4 
average train loss: 0.4198002501659923 ,  average accuracy : 87.16222222222223%
update learning rate:  [0.00063095725]
validation accuracy : 76.28%
* Best accuracy: 76.28%
epoch 5 
average train loss: 0.4059996331400341 ,  average accuracy : 90.64666666666666%
update learning rate:  [0.00056234124]
validation accuracy : 77.98%
* Best accuracy: 77.98%
epoch 6 
average train loss: 0.3973089

In [8]:
# BinaryConnect det.
from binary import training
tf.reset_default_graph()
training(X_train, y_train, X_val, y_val, X_test, y_test,
         is_binary = True, 
         is_stochastic = False, 
         conv_featmap = [128, 128, 256, 256, 512, 512],
         fc_units = [1024, 1024],
         conv_kernel_size = [3, 3, 3, 3, 3, 3],
         pooling_size = [2, 2, 2],
         lr_start = 0.1,
         lr_end = 0.001, 
         epoch = 20,
         batch_size = 50,
         is_drop_out = False,
         verbose = True,
         pre_trained_model = None)

number of batches for training: 900
epoch 1 
average train loss: 0.6522929443915685 ,  average accuracy : 44.34222222222222%
update learning rate:  [0.07943282]
validation accuracy : 51.86%
* Best accuracy: 51.86%
epoch 2 
average train loss: 0.5146370652980274 ,  average accuracy : 66.07777777777778%
update learning rate:  [0.06309573]
validation accuracy : 44.34%
epoch 3 
average train loss: 0.49400254315800135 ,  average accuracy : 70.50222222222223%
update learning rate:  [0.050118722]
validation accuracy : 47.8%
epoch 4 
average train loss: 0.4813077500462532 ,  average accuracy : 72.88222222222223%
update learning rate:  [0.039810713]
validation accuracy : 60.02%
* Best accuracy: 60.02%
epoch 5 
average train loss: 0.4675484471850925 ,  average accuracy : 76.15777777777778%
update learning rate:  [0.03162277]
validation accuracy : 63.42%
* Best accuracy: 63.42%
epoch 6 
average train loss: 0.4523362907767296 ,  average accuracy : 79.14222222222222%
update learning rate:  [0.02511

In [9]:
# BinaryConnect stoch.
from binary import training
tf.reset_default_graph()
training(X_train, y_train, X_val, y_val, X_test, y_test,
         is_binary = True, 
         is_stochastic = True, 
         conv_featmap = [128, 128, 256, 256, 512, 512],
         fc_units = [1024, 1024],
         conv_kernel_size = [3, 3, 3, 3, 3, 3],
         pooling_size = [2, 2, 2],
         lr_start = 0.1,
         lr_end = 0.0001, 
         epoch = 50,
         batch_size = 50,
         is_drop_out = False,
         verbose = True,
         pre_trained_model = None)

number of batches for training: 900
epoch 1 
average train loss: 0.7394289011425442 ,  average accuracy : 34.05111111111111%
update learning rate:  [0.087096356]
validation accuracy : 46.72%
* Best accuracy: 46.72%
epoch 2 
average train loss: 0.5385691745744812 ,  average accuracy : 62.315555555555555%
update learning rate:  [0.07585775]
validation accuracy : 51.14%
* Best accuracy: 51.14%
epoch 3 
average train loss: 0.5167757494913208 ,  average accuracy : 66.14444444444445%
update learning rate:  [0.066069335]
validation accuracy : 46.58%
epoch 4 
average train loss: 0.5145139608449406 ,  average accuracy : 66.45111111111112%
update learning rate:  [0.057543986]
validation accuracy : 55.7%
* Best accuracy: 55.7%
epoch 5 
average train loss: 0.5089431831902927 ,  average accuracy : 67.69777777777779%
update learning rate:  [0.050118715]
validation accuracy : 47.32%
epoch 6 
average train loss: 0.5042793378565047 ,  average accuracy : 68.69777777777777%
update learning rate:  [0.0436

epoch 49 
average train loss: 0.3796365750498242 ,  average accuracy : 98.28444444444445%
update learning rate:  [0.00011481525]
validation accuracy : 84.6%
epoch 50 
average train loss: 0.37955375307136113 ,  average accuracy : 98.29333333333334%
update learning rate:  [9.99999e-05]
validation accuracy : 84.53999999999999%
test accuracy: 84.83%
Traning ends. The best valid accuracy is 84.8%. Model named cifar10_1549236134.
