In [1]:
#Author:  SYH & SXK
#DATE:    2018.9.8
#TASK:    NN with SA
#Team:    No.0201


In [2]:
#Setup

import numpy as np
import matplotlib.pyplot as plt

from __future__ import print_function

%matplotlib inline

random_seed = 0


In [3]:
from head.data_utils import load_CIFAR10


#Load Data

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
        
    # Subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 3072)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3072)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3072)
Test labels shape:  (1000,)


In [None]:
#Network Settings

input_size = 32 * 32 * 3
hidden_size = 50
num_classes = 10


In [None]:
#Training use SGD 
#Test set accuracy around 50%

from head.neural_net import TwoLayerNet

np.random.seed(random_seed)

#Training hyperparams
batch_size = 236
learning_rate = 5e-4
reg = 0.1

net_bp = TwoLayerNet(input_size, hidden_size, num_classes)
stats_bp = net_bp.train_bp(X_train, y_train, X_val, y_val,
                num_iters=3000, batch_size=batch_size,
                learning_rate=learning_rate, learning_rate_decay=0.95,
                reg=reg, verbose=True)

test_acc = (net_bp.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

iteration 0 / 3000: loss 2.302747
iteration 100 / 3000: loss 2.151115
iteration 200 / 3000: loss 1.956375
iteration 300 / 3000: loss 1.795100
iteration 400 / 3000: loss 1.650096
iteration 500 / 3000: loss 1.788248
iteration 600 / 3000: loss 1.632312
iteration 700 / 3000: loss 1.725397
iteration 800 / 3000: loss 1.546313
iteration 900 / 3000: loss 1.617557
iteration 1000 / 3000: loss 1.605412
iteration 1100 / 3000: loss 1.411051
iteration 1200 / 3000: loss 1.647157


In [None]:
#Training use standard SA

#This part is a proof that standard SA will not work in high dimension

from head.neural_net import TwoLayerNet

np.random.seed(random_seed)

#Training hyperparameters
batch_size = 200
step_len = 5e-4
reg = 0.1

net_sa = TwoLayerNet(input_size, hidden_size, num_classes)
stats_sa = net_sa.train_sa(X_train, y_train, X_val, y_val,
        num_iters=3000, batch_size=batch_size, step_len = step_len,
        reg=reg, verbose=True)

test_acc = (net_sa.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

In [None]:
#Training use new SA

from head.neural_net import TwoLayerNet

np.random.seed(random_seed)

#Training hyperparams
batch_size = 236
step_len = 5e-4
reg = 0.1

net_test = TwoLayerNet(input_size, hidden_size, num_classes)
stats_test = net_test.train_test(X_train, y_train, X_val, y_val,
                num_iters=3000, batch_size=batch_size,
                step_len = step_len, sigma = 0.1,
                reg=reg, verbose=True)

test_acc = (net_test.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

In [None]:
# Plot the loss function 
plt.subplot(2, 1, 1)
plt.plot(stats_bp['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('BP Loss')

plt.subplot(2, 1, 2)
plt.plot(stats_test['loss_history'])
plt.xlabel('Iteration')
plt.ylabel('SA Loss')
plt.show()

In [None]:
#Plot the loss function

plt.plot(stats_bp['loss_history'])
plt.plot(stats_test['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.show()