# Experiment 2
*Question*: how does generalization error (error on validation set) change as we reduce the amount of unique training examples for both the subspace-constrained and non-subspace-constrained methods?
*Hypothesis*: Subspace constrained method will have less generalization error than non subspace constrained method as the number of unique training examples decreases
*Todo*: use subspace dimension from experiment 1

In [None]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver
from copy import deepcopy

plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

def rel_error(x, y):
  # returns relative error
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Load the preprocessed CIFAR10 data.
alldata = get_CIFAR10_data()
for k, v in alldata.items():
  print('%s: ' % k, v.shape)

X_train:  (49000, 3, 32, 32)
y_train:  (49000,)
X_val:  (1000, 3, 32, 32)
y_val:  (1000,)
X_test:  (1000, 3, 32, 32)
y_test:  (1000,)


In [None]:
ntrain_total = alldata['X_train'].shape[0]
pct_train_sweep = (1, 0.8, 0.6, 0.4, 0.2)
results_train_accuracy = np.zeros((len(pct_train_sweep),2))
results_test_accuracy  = np.zeros((len(pct_train_sweep),2))
for (i,pct_train) in enumerate(pct_train_sweep):
    # -------------------------
    # --- generate data set ---
    # -------------------------
    ntrain_unique = round(pct_train*ntrain_total)
    ntrain_dupl = ntrain_total - ntrain_unique
    ind_unique = np.arange(0,ntrain_unique)
    ind_dupl = np.random.choice(np.arange(0,ntrain_unique),size=ntrain_dupl)
    ind_train = np.concatenate((ind_unique, ind_dupl))
    print('----- trial %d: %d percent of data (%d unique examples, %d duplicate examples) -----' % (i, pct_train*100, ntrain_unique, ntrain_dupl))
    #print('unique ind:')
    #print(ind_unique)
    #print('duplicate ind:')
    #print(ind_dupl)
    data_abbrev = {
        'X_train': deepcopy(alldata['X_train'])[ind_train,:,:,:],
        'y_train': deepcopy(alldata['y_train'])[ind_train],
        'X_val':   deepcopy(alldata['X_val']),
        'y_val':   deepcopy(alldata['y_val']),
        'X_test':  deepcopy(alldata['X_test']),
        'y_test':  deepcopy(alldata['y_test'])
    }
    # --------------------------------------
    # --- train and report test accuracy ---
    # --------------------------------------
    standardModel  = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=600, reg=0.001)
    subspaceModel  = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=600, reg=0.001)
    standardSolver = Solver(standardModel, data_abbrev,
                            num_epochs=2, batch_size=50,
                            update_rule='adam',
                            optim_config={
                              'learning_rate': 1e-4,
                            },
                            verbose=True, print_every=20)
    subspaceSolver = Solver(subspaceModel, data_abbrev,
                            num_epochs=2, batch_size=50,
                            update_rule='adam',
                            optim_config={
                                'learning_rate': 1e-4,
                            },
                            verbose=True, print_every=20)
    ambient_dim = 48 # TODO 49 makes value of loss function blow up (or 48)
    reduced_dim = 24
    #print('=== TRAINING STANDARD MODEL FOR TRIAL %d ===' % i)
    #standardSolver.train(dim=ambient_dim)
    #results_train_accuracy[i,0] = standardSolver.train_acc_history[-1]
    #results_test_accuracy[i,0] = subspaceSolver.check_accuracy(alldata['X_test'],alldata['y_test'])
    #print('final accuracy for %d percent of data, standard model: %.4f (train), %.4f (test)' % (pct_train*100, results_train_accuracy[i,0], results_test_accuracy[i,1]))
    print('=== TRAINING SUBSPACE MODEL FOR TRIAL %d ===' % i)
    subspaceSolver.train(dim=reduced_dim)
    results_train_accuracy[i,1] = subspaceSolver.train_acc_history[-1]
    results_test_accuracy[i,1] = subspaceSolver.check_accuracy(alldata['X_test'],alldata['y_test'])
    print('final accuracy for %d percent of data, subspace model: %.4f (train), %.4f (test)' % (pct_train*100, results_train_accuracy[i,0], results_test_accuracy[i,1]))

----- trial 0: 100 percent of data (49000 unique examples, 0 duplicate examples) -----
=== TRAINING SUBSPACE MODEL FOR TRIAL 0 ===
(Iteration 1 / 1960) loss: 2.304941
(Epoch 0 / 2) train acc: 0.149000; val_acc: 0.174000
(Iteration 21 / 1960) loss: 2.122976
(Iteration 41 / 1960) loss: 2.067955
(Iteration 61 / 1960) loss: 1.823377
(Iteration 81 / 1960) loss: 1.810794
(Iteration 101 / 1960) loss: 1.716472
(Iteration 121 / 1960) loss: 1.569197
(Iteration 141 / 1960) loss: 1.584783
(Iteration 161 / 1960) loss: 1.734140
(Iteration 181 / 1960) loss: 1.655026
(Iteration 201 / 1960) loss: 1.843356
(Iteration 221 / 1960) loss: 1.566664
(Iteration 241 / 1960) loss: 1.495931
(Iteration 261 / 1960) loss: 1.196536
(Iteration 281 / 1960) loss: 1.814743
(Iteration 301 / 1960) loss: 1.527810
(Iteration 321 / 1960) loss: 1.303619
(Iteration 341 / 1960) loss: 1.530358
(Iteration 361 / 1960) loss: 1.156364
(Iteration 381 / 1960) loss: 1.369577
(Iteration 401 / 1960) loss: 1.502101
(Iteration 421 / 1960) l

final accuracy for 80 percent of data, subspace model: 0.0000 (train), 0.5990 (test)
----- trial 2: 60 percent of data (29400 unique examples, 19600 duplicate examples) -----
=== TRAINING SUBSPACE MODEL FOR TRIAL 2 ===
(Iteration 1 / 1960) loss: 2.304823
(Epoch 0 / 2) train acc: 0.161000; val_acc: 0.172000
(Iteration 21 / 1960) loss: 2.150688
(Iteration 41 / 1960) loss: 1.913817
(Iteration 61 / 1960) loss: 1.730101
(Iteration 81 / 1960) loss: 2.106959
(Iteration 101 / 1960) loss: 1.609115
(Iteration 121 / 1960) loss: 1.680619
(Iteration 141 / 1960) loss: 1.521596
(Iteration 161 / 1960) loss: 1.473819
(Iteration 181 / 1960) loss: 1.644291
(Iteration 201 / 1960) loss: 1.432734
(Iteration 221 / 1960) loss: 1.500182
(Iteration 241 / 1960) loss: 1.781468
(Iteration 261 / 1960) loss: 1.655789
(Iteration 281 / 1960) loss: 1.560181
(Iteration 301 / 1960) loss: 1.538825
(Iteration 321 / 1960) loss: 1.660952
(Iteration 341 / 1960) loss: 1.469213
(Iteration 361 / 1960) loss: 1.428372
(Iteration 3

In [None]:
#results_train_accuracy = np.random.rand(6,2)
#results_test_accuracy = np.random.rand(6,2)
print(results_train_accuracy)
plt.subplot(2,1,1)
plt.plot(results_train_accuracy[:,0], '-o')
plt.plot(results_train_accuracy[:,1], '-o')
plt.legend(['0', '1'], loc='upper left')
plt.xlabel('training set size')
plt.ylabel('accuracy')
plt.title('Training accuracy')
plt.subplot(2,1,2)
plt.plot(results_test_accuracy[:,0], '-o')
plt.plot(results_test_accuracy[:,1], '-o')
plt.legend(['0','1'], loc='upper left')
plt.xlabel('training set size')
plt.ylabel('accuracy')
plt.title('Test accuracy')
plt.show()