# Experiment 2
*Question*: how does generalization error (error on validation set) change as we reduce the amount of unique training examples for both the subspace-constrained and non-subspace-constrained methods?
*Hypothesis*: Subspace constrained method will have less generalization error than non subspace constrained method as the number of unique training examples decreases
*Todo*: use subspace dimension from experiment 1

In [1]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver
from copy import deepcopy
from IPython.core.debugger import set_trace

plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

def rel_error(x, y):
  # returns relative error
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Load the preprocessed CIFAR10 data.
alldata = get_CIFAR10_data()
for k, v in alldata.items():
  print('%s: ' % k, v.shape)

X_train:  (49000, 3, 32, 32)
y_train:  (49000,)
X_val:  (1000, 3, 32, 32)
y_val:  (1000,)
X_test:  (1000, 3, 32, 32)
y_test:  (1000,)


In [2]:
ntrain_total = alldata['X_train'].shape[0]
pct_train_sweep = (1, 0.8, 0.6, 0.4, 0.2)
results_train_accuracy = np.zeros((len(pct_train_sweep),2))
results_test_accuracy  = np.zeros((len(pct_train_sweep),2))
for (i,pct_train) in enumerate(pct_train_sweep):
    # -------------------------
    # --- generate data set ---
    # -------------------------
    ntrain_unique = round(pct_train*ntrain_total)
    ntrain_dupl = ntrain_total - ntrain_unique
    ind_unique = np.arange(0,ntrain_unique)
    ind_dupl = np.random.choice(np.arange(0,ntrain_unique),size=ntrain_dupl)
    ind_train = np.concatenate((ind_unique, ind_dupl))
    print('----- trial %d: %d percent of data (%d unique examples, %d duplicate examples) -----' % (i, pct_train*100, ntrain_unique, ntrain_dupl))
    #print('unique ind:')
    #print(ind_unique)
    #print('duplicate ind:')
    #print(ind_dupl)
    data_abbrev = {
        'X_train': deepcopy(alldata['X_train'])[ind_train,:,:,:],
        'y_train': deepcopy(alldata['y_train'])[ind_train],
        'X_val':   deepcopy(alldata['X_val']),
        'y_val':   deepcopy(alldata['y_val']),
        'X_test':  deepcopy(alldata['X_test']),
        'y_test':  deepcopy(alldata['y_test'])
    }
    # --------------------------------------
    # --- train and report test accuracy ---
    # --------------------------------------
    standardModel  = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=600, reg=0.001)
    subspaceModel  = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=600, reg=0.001)
    standardSolver = Solver(standardModel, data_abbrev,
                            num_epochs=2, batch_size=50,
                            update_rule='adam',
                            optim_config={
                              'learning_rate': 1e-4,
                            },
                            verbose=True, print_every=20)
    subspaceSolver = Solver(subspaceModel, data_abbrev,
                            num_epochs=2, batch_size=50,
                            update_rule='adam',
                            optim_config={
                                'learning_rate': 1e-4,
                            },
                            verbose=True, print_every=20)
    reduced_dim = 24
    print('=== TRAINING STANDARD MODEL FOR TRIAL %d ===' % i)
    standardSolver.train()
    results_train_accuracy[i,0] = standardSolver.train_acc_history[-1]
    results_test_accuracy[i,0] = standardSolver.check_accuracy(alldata['X_test'],alldata['y_test'])
    print('final accuracy for %d percent of data, standard model: %.4f (train), %.4f (test)' % (pct_train*100, results_train_accuracy[i,0], results_test_accuracy[i,1]))
    print('=== TRAINING SUBSPACE MODEL FOR TRIAL %d ===' % i)
    subspaceSolver.train(dim=reduced_dim)
    results_train_accuracy[i,1] = subspaceSolver.train_acc_history[-1]
    results_test_accuracy[i,1] = subspaceSolver.check_accuracy(alldata['X_test'],alldata['y_test'])
    print('final accuracy for %d percent of data, subspace model: %.4f (train), %.4f (test)' % (pct_train*100, results_train_accuracy[i,0], results_test_accuracy[i,1]))

----- trial 0: 100 percent of data (49000 unique examples, 0 duplicate examples) -----
=== TRAINING STANDARD MODEL FOR TRIAL 0 ===
(Iteration 1 / 1960) loss: 2.304995
(Epoch 0 / 2) train acc: 0.162000; val_acc: 0.175000
(Iteration 21 / 1960) loss: 2.157250
(Iteration 41 / 1960) loss: 1.967369
(Iteration 61 / 1960) loss: 1.870945
(Iteration 81 / 1960) loss: 1.716645
(Iteration 101 / 1960) loss: 1.815589
(Iteration 121 / 1960) loss: 1.715465
(Iteration 141 / 1960) loss: 1.500106
(Iteration 161 / 1960) loss: 1.874528
(Iteration 181 / 1960) loss: 1.899476
(Iteration 201 / 1960) loss: 1.594301
(Iteration 221 / 1960) loss: 1.683984
(Iteration 241 / 1960) loss: 1.429353
(Iteration 261 / 1960) loss: 1.581854
(Iteration 281 / 1960) loss: 1.497817
(Iteration 301 / 1960) loss: 1.701874
(Iteration 321 / 1960) loss: 1.597978
(Iteration 341 / 1960) loss: 1.287961
(Iteration 361 / 1960) loss: 1.658388
(Iteration 381 / 1960) loss: 1.421290
(Iteration 401 / 1960) loss: 1.529719
(Iteration 421 / 1960) l

=== TRAINING STANDARD MODEL FOR TRIAL 1 ===
(Iteration 1 / 1960) loss: 2.304943
(Epoch 0 / 2) train acc: 0.151000; val_acc: 0.158000
(Iteration 21 / 1960) loss: 2.117028
(Iteration 41 / 1960) loss: 1.832338
(Iteration 61 / 1960) loss: 1.805229
(Iteration 81 / 1960) loss: 2.039425
(Iteration 101 / 1960) loss: 1.548470
(Iteration 121 / 1960) loss: 1.731984
(Iteration 141 / 1960) loss: 1.800997
(Iteration 161 / 1960) loss: 1.344682
(Iteration 181 / 1960) loss: 1.206739
(Iteration 201 / 1960) loss: 1.771347
(Iteration 221 / 1960) loss: 1.590330
(Iteration 241 / 1960) loss: 1.683910
(Iteration 261 / 1960) loss: 1.568958
(Iteration 281 / 1960) loss: 1.462315
(Iteration 301 / 1960) loss: 1.626151
(Iteration 321 / 1960) loss: 1.304370
(Iteration 341 / 1960) loss: 1.684724
(Iteration 361 / 1960) loss: 1.423770
(Iteration 381 / 1960) loss: 1.623692
(Iteration 401 / 1960) loss: 1.715405
(Iteration 421 / 1960) loss: 1.630377
(Iteration 441 / 1960) loss: 1.511309
(Iteration 461 / 1960) loss: 1.4490

=== TRAINING STANDARD MODEL FOR TRIAL 2 ===
(Iteration 1 / 1960) loss: 2.304949
(Epoch 0 / 2) train acc: 0.120000; val_acc: 0.124000
(Iteration 21 / 1960) loss: 2.049667
(Iteration 41 / 1960) loss: 1.983096
(Iteration 61 / 1960) loss: 1.834232
(Iteration 81 / 1960) loss: 1.993941
(Iteration 101 / 1960) loss: 1.456182
(Iteration 121 / 1960) loss: 1.850919
(Iteration 141 / 1960) loss: 1.316464
(Iteration 161 / 1960) loss: 1.665854
(Iteration 181 / 1960) loss: 1.553799
(Iteration 201 / 1960) loss: 1.434514
(Iteration 221 / 1960) loss: 1.340702
(Iteration 241 / 1960) loss: 1.555840
(Iteration 261 / 1960) loss: 1.695310
(Iteration 281 / 1960) loss: 1.654336
(Iteration 301 / 1960) loss: 1.675273
(Iteration 321 / 1960) loss: 1.497057
(Iteration 341 / 1960) loss: 1.548534
(Iteration 361 / 1960) loss: 1.335467
(Iteration 381 / 1960) loss: 1.540241
(Iteration 401 / 1960) loss: 1.310434
(Iteration 421 / 1960) loss: 1.303692
(Iteration 441 / 1960) loss: 1.328130
(Iteration 461 / 1960) loss: 1.2261

=== TRAINING STANDARD MODEL FOR TRIAL 3 ===
(Iteration 1 / 1960) loss: 2.304651
(Epoch 0 / 2) train acc: 0.176000; val_acc: 0.148000
(Iteration 21 / 1960) loss: 2.137957
(Iteration 41 / 1960) loss: 1.799657
(Iteration 61 / 1960) loss: 2.065138
(Iteration 81 / 1960) loss: 1.663573
(Iteration 101 / 1960) loss: 1.501495
(Iteration 121 / 1960) loss: 1.724843
(Iteration 141 / 1960) loss: 1.941786
(Iteration 161 / 1960) loss: 1.628663
(Iteration 181 / 1960) loss: 1.535586
(Iteration 201 / 1960) loss: 1.636595
(Iteration 221 / 1960) loss: 1.558481
(Iteration 241 / 1960) loss: 1.738307
(Iteration 261 / 1960) loss: 1.363172
(Iteration 281 / 1960) loss: 1.202737
(Iteration 301 / 1960) loss: 1.517403
(Iteration 321 / 1960) loss: 1.296008
(Iteration 341 / 1960) loss: 1.602054
(Iteration 361 / 1960) loss: 1.394601
(Iteration 381 / 1960) loss: 1.271911
(Iteration 401 / 1960) loss: 1.099083
(Iteration 421 / 1960) loss: 1.292488
(Iteration 441 / 1960) loss: 1.402308
(Iteration 461 / 1960) loss: 1.5159

=== TRAINING STANDARD MODEL FOR TRIAL 4 ===
(Iteration 1 / 1960) loss: 2.305070
(Epoch 0 / 2) train acc: 0.185000; val_acc: 0.167000
(Iteration 21 / 1960) loss: 2.180034
(Iteration 41 / 1960) loss: 1.956345
(Iteration 61 / 1960) loss: 1.936855
(Iteration 81 / 1960) loss: 1.659124
(Iteration 101 / 1960) loss: 1.657723
(Iteration 121 / 1960) loss: 1.629823
(Iteration 141 / 1960) loss: 1.465144
(Iteration 161 / 1960) loss: 1.739698
(Iteration 181 / 1960) loss: 1.372691
(Iteration 201 / 1960) loss: 1.452380
(Iteration 221 / 1960) loss: 1.336134
(Iteration 241 / 1960) loss: 1.469004
(Iteration 261 / 1960) loss: 1.384135
(Iteration 281 / 1960) loss: 1.280915
(Iteration 301 / 1960) loss: 1.172459
(Iteration 321 / 1960) loss: 1.383738
(Iteration 341 / 1960) loss: 1.382612
(Iteration 361 / 1960) loss: 1.187923
(Iteration 381 / 1960) loss: 1.547441
(Iteration 401 / 1960) loss: 1.355714
(Iteration 421 / 1960) loss: 1.397818
(Iteration 441 / 1960) loss: 1.235271
(Iteration 461 / 1960) loss: 1.0068

In [None]:
# results from 02/26/2018
results_train_accuracy = ((0.632, 0.658), (0.692, 0.699), (0.715, 0.701), (0.796, 0.773), (0.919, 0.915))
results_test_accuracy  = ((0.611, 0.588), (0.587, 0.609), (0.597, 0.584), (0.591, 0.589), (0.565, 0.557))

print('training accuracy:')
print(results_train_accuracy)
print('test accuracy:')
print(results_test_accuracy)

plt.subplot(2,1,1)
set_trace()
plt.plot(pct_train_sweep,results_train_accuracy[:,0], '-o')
plt.plot(pct_train_sweep,results_train_accuracy[:,1], '-o')
plt.legend(['Unconstrained network', 'Subspace constrained network'], loc='upper right')
plt.xlabel('portion training samples unique')
plt.ylabel('accuracy')
plt.title('Training accuracy')
plt.xlim(0.15,1.05)
plt.ylim(0,1)

plt.subplot(2,1,2)
plt.plot(pct_train_sweep,results_test_accuracy[:,0], '-o')
plt.plot(pct_train_sweep,results_test_accuracy[:,1], '-o')
plt.legend(['Unconstrained network', 'Subspace constrained network'], loc='upper left')
plt.xlabel('portion training samples unique')
plt.ylabel('accuracy')
plt.title('Test accuracy')
plt.xlim(0.15,1.05)
plt.ylim(0,1)
plt.show()