In [1]:
import numpy as np
import matplotlib.pyplot as plt

from keras import regularizers, optimizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

Using TensorFlow backend.


In [2]:
from cs231n.data_utils import load_CIFAR10
from sklearn.decomposition import PCA

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, is_pca=False):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    # Apply PCA
    if is_pca == True:
        pca = PCA(n_components=32*32)
        X_train = pca.fit_transform(X_train.reshape((X_train.shape[0], -1)))
        X_test = pca.fit_transform(X_test.reshape((X_test.shape[0], -1)))
        
    # Subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    
    # Range should be [-1, 1]
    X_train = 2 * X_train - 1
    X_val = 2 * X_val - 1
    X_test = 2 * X_test - 1

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data(is_pca=False)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 3072)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3072)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3072)
Test labels shape:  (1000,)


In [3]:
from keras.utils.np_utils import to_categorical

y_train = to_categorical(y_train, num_classes=None)
y_val = to_categorical(y_val, num_classes=None)


In [8]:
best_net = None # store the best model into this 

# learning_rates = [1.8e-3]
learning_rates = [1e-2]
regularization_strengths = [1e-1]

results = {}
best_val = -1

input_size = 32 * 32 * 3
hidden_sizes = [400]
num_classes = 10

def generate_arrays(X, y, batch_size):
    num_train = X.shape[0]
    while 1:
        mask = np.random.choice(num_train, batch_size, replace=True)
        X_batch = X[mask]
        y_batch = y[mask]
        
        yield (X_batch, y_batch)

for learning_rate in learning_rates:
    for regularization_strength in regularization_strengths:
        for hidden_size in hidden_sizes:
            model = Sequential()
            model.add(Dense(hidden_size, activation='relu', kernel_initializer='uniform',
                            kernel_regularizer=regularizers.l2(regularization_strength),
                            input_shape=(input_size,)))
                      
            model.add(Dense(num_classes, activation='softmax',
                            kernel_regularizer=regularizers.l2(regularization_strength),))
            
            optimizer = optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=False)
        
            model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

            
            loss_hist = model.fit_generator(generate_arrays(X_train, y_train, batch_size=200),
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=30,
                                            epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100


Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
