In [1]:
from __future__ import division, print_function, absolute_import
import numpy as np
from sklearn.metrics import mean_squared_error, accuracy_score
from queue import Queue
from mltools.dataManager import DataManager
from mltools.models import baseLearner
from mltools.nnet.layers import *
from mltools.nnet.optim import adam, sgd
from mltools.utils import one_hot

In [2]:
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

## Neural Net

In [30]:
class NeuralNet():
    
    def __init__(self, batch_size=32, hidden_dim=100, epochs=100, learning_rate=.001, reg=0, early_stop=10):
        self.BATCH_SIZE = batch_size
        self.HIDDEN_DIM = hidden_dim
        self.EPOCHS = epochs
        self.REG = reg
        self.LR = learning_rate
        self.NUM_CLASSES = None
        self.EARLY_STOP = early_stop
    
    
    def init_weights(self, verbose=True):
        # Initialize weights to train
        params = {}
        params['W1'] = np.random.normal(scale=.1, size=(self.INPUT_DIM, self.HIDDEN_DIM))
        params['W2'] = np.random.normal(scale=.1, size=(self.HIDDEN_DIM, self.NUM_CLASSES))
        #params['W1'] = np.ones((self.INPUT_DIM, self.HIDDEN_DIM))
        #params['W2'] = np.ones((self.HIDDEN_DIM, self.NUM_CLASSES))
        params['b1'] = np.ones(self.HIDDEN_DIM)
        params['b2'] = np.ones(self.NUM_CLASSES)
        self.params = params
        if verbose:
            print('\nInitialized Paramaters:')
            for name, tensor in params.iteritems():
                print('%s: ' % name, tensor.shape)
                
    def train(self, x_train, y_train, optim='sgd', num_classes=None, verbose=True):
        
        if num_classes is None:
            num_classes = int(np.max(y_train) + 1)
            self.NUM_CLASSES = num_classes
        temp = DataManager()
        temp.set_data(x_train, y_train)
        x_train, x_val, y_train_num, y_val_num = temp.test_train_split(.9)
        y_train = one_hot(y_train_num, num_class=num_classes)
        y_val = one_hot(y_val_num, num_class=num_classes)
        
            
        self.INPUT_DIM = x_train.shape[1]
        self.init_weights(False)
        params = self.params
        
        # Trianing
        num_training = x_train.shape[0]
        iters_per_epoch = int(max(num_training / self.BATCH_SIZE, 1))
        num_iterations = int(self.EPOCHS * iters_per_epoch)

        # For logging results
        self.loss_hist_train = []
        self.loss_hist_val = []
        self.acc_hist_train = []
        self.acc_hist_val = []
        self.num_hist_train = []
        self.num_hist_val = []
        
        if verbose: print('\nStarting Training:')
        for epoch in range(self.EPOCHS):
            # Shuffle the data
            shuffle_idx = np.random.permutation(range(x_train.shape[0]))
            x_train = x_train[shuffle_idx]
            y_train = y_train[shuffle_idx]
            y_train_num = y_train_num[shuffle_idx]

            for i in range(iters_per_epoch):
                # Make minibatch
                batch_start = self.BATCH_SIZE*i
                batch_end = self.BATCH_SIZE*(i+1)
                x_batch = x_train[batch_start:batch_end]
                y_batch = y_train[batch_start:batch_end]
                y_batch_num = y_train_num[batch_start:batch_end]

                # Forward Pass
                h1, linear_cache_1 = linear_forward(x_batch, params['W1'], params['b1'])
                a1, relu_cache_1= relu_forward(h1)
                h2, linear_cache_2 = linear_forward(a1, params['W2'], params['b2'])
                a2, relu_cache_2 = relu_forward(h2)

                # Loss
                probs, loss, dx = softmax_loss(a2, y_batch_num)
                # Plus regularization
                loss += .5 * self.REG * np.sum(params['W1']**2) + .5 * self.REG * np.sum(params['W2']**2)
                self.loss_hist_train.append(loss)

                # Backwards Pass
                grads = {}
                da = relu_backward(dx, relu_cache_2)
                dx_2, grads['W2'], grads['b2'] = linear_backward(da, linear_cache_2)
                da_2 = relu_backward(dx_2, relu_cache_1)
                dx, grads['W1'], grads['b1'] = linear_backward(da_2, linear_cache_1)

                # Regularization (optional)
                grads['W2'] += self.REG * params['W2']
                grads['W1'] += self.REG * params['W1']

                # Parameter update
                for p, w in params.iteritems():
                    dw = grads[p]
                    if optim == 'sgd':
                        next_w, next_config = sgd(w, dw, config={'learning_rate':self.LR})
                    else:
                        # Optimization using momentum
                        next_w, next_config = adam(w, dw, config={'learning_rate':self.LR})
                    # Update weights
                    params[p] = next_w
    
            # Calculate the accuracy
            probs = np.exp(a2 - np.max(a2, axis=1, keepdims=True))
            probs /= np.sum(probs, axis=1, keepdims=True)
            y_pred = np.argmax(probs, axis=1)
            train_acc = np.mean(y_pred == y_batch_num)
            num_correct = np.sum(y_pred == y_batch_num)
            self.acc_hist_train.append(train_acc)
            self.num_hist_train.append(num_correct)
            if verbose: print("[{}] loss: {}, Acc: {}".format(epoch, loss, train_acc))
                
            # calc accuaracy on validation
            probs = self.predict(x_val, hard=False)
            #mse = mean_squared_error(probs, y_val)
            y_pred = np.argmax(probs, axis=1)
            train_acc = np.mean(y_pred == y_val_num)
            num_correct = np.sum(y_pred == y_val_num)
            self.acc_hist_val.append(train_acc)
            self.num_hist_val.append(num_correct)
            
            # Early stopping
            past_acc = self.acc_hist_val[-5:]
            if np.array_equal(past_acc[::-1], np.sort(past_acc)):
                print("STOP")
                print(past_acc)

            
        self.params = params  
        if verbose: print('\n---Completed Training---')
            
    def predict(self, x_test,hard=True):
        params = self.params
        h1, _ = linear_forward(x_test, params['W1'], params['b1'])
        a1, _ = relu_forward(h1)
        h2, _ = linear_forward(a1, params['W2'], params['b2'])
        a2, _ = relu_forward(h2)
        probs = np.exp(a2 - np.max(a2, axis=1, keepdims=True))
        probs /= np.sum(probs, axis=1, keepdims=True)
        if hard:
            return np.argmax(probs, axis=1)
        return probs
            
    def accuracy(self, x_test, y_test):
        probs = self.predict(x_test)
        y_pred = np.argmax(probs, axis=1)
        return np.mean(y_pred == y_test)
        
    def mse(self, x_test, y_test):
        probs = self.predict(x_test)
        return mean_squared_error(probs, y_test)       
    
    def save_model(self, output):
        pickle.dump(self.params, open(output, 'wb'))
        
    def load_model(in_file):
        self.params = pickle.load(open(in_file, 'rb'))
        self.HIDDEN_DIM = self.params['W1'].shape[1]

## Part 2

In [31]:
# Load data
data = DataManager('datasets/iris.arff')
data.normalize()
x_train, x_test, y_train_num, y_test_num = data.test_train_split(.75)
y_train = one_hot(y_train_num, num_class=3)
y_test = one_hot(y_test_num, num_class=3)

In [32]:
net = NeuralNet(learning_rate=.1, hidden_dim=8, batch_size=50)
net.train(x_train, y_train_num, optim='sgd', verbose=True)
y_pred = net.predict(x_test, hard=True)
print(np.mean(y_pred == y_test_num))


Starting Training:
[0] loss: 1.10923097281, Acc: 0.38
STOP
[0.25]
[1] loss: 1.09717144106, Acc: 0.5
STOP
[0.25, 0.25]
[2] loss: 1.10083028487, Acc: 0.3
STOP
[0.25, 0.25, 0.25]
[3] loss: 1.09090181523, Acc: 0.3
STOP
[0.25, 0.25, 0.25, 0.25]
[4] loss: 1.03022420512, Acc: 0.42
STOP
[0.25, 0.25, 0.25, 0.25, 0.25]
[5] loss: 1.01607414939, Acc: 0.4
STOP
[0.25, 0.25, 0.25, 0.25, 0.25]
[6] loss: 0.999512343641, Acc: 0.38
STOP
[0.25, 0.25, 0.25, 0.25, 0.25]
[7] loss: 1.01328402281, Acc: 0.28
[8] loss: 0.894164610704, Acc: 0.64
[9] loss: 0.860351546525, Acc: 0.54
[10] loss: 0.75958962933, Acc: 0.76
[11] loss: 0.794779238343, Acc: 0.56
STOP
[0.41666666666666669, 0.41666666666666669, 0.41666666666666669, 0.41666666666666669, 0.41666666666666669]
[12] loss: 0.706168233716, Acc: 0.62
[13] loss: 0.659664803787, Acc: 0.66
[14] loss: 0.637467119484, Acc: 0.86
[15] loss: 0.621111697625, Acc: 0.9
[16] loss: 0.560051835415, Acc: 0.96
[17] loss: 0.642498813696, Acc: 0.92
[18] loss: 0.559049138993, Acc: 0.

In [None]:
net.predict(x_test, y_test)

## Part 3

In [None]:
# Load data
data = DataManager('datasets/vowel.arff')
data.normalize()
x_train, x_test, y_train_num, y_test_num = data.test_train_split(.75)
y_train = one_hot(y_train_num, num_class=11)
y_test = one_hot(y_test_num, num_class=11)

In [None]:
data.labels

In [None]:
net = NeuralNet(learning_rate=.01)
net.train(x_train, y_train_num, optim='sgd', verbose=False)
y_pred = net.predict(x_test)
test_acc = np.mean(y_pred == y_test_num)
print(test_acc)

In [None]:
lr_results = []
for lr in [.001, .01, .1, 1, 10]:
    acc_list = []
    for i in range(5):
        net = NeuralNet(learning_rate=lr)
        net.train(x_train, y_train_num, optim='sgd', verbose=False)
        y_pred = net.predict(x_test)
        test_acc = np.mean(y_pred == y_test_num)
        acc_list.append(test_acc)
    avg = np.mean(acc_list)
    lr_results.append(avg)

In [None]:
lr_results

In [None]:
plt.plot(lr_results)

## Problem 4

In [None]:
# Load data
data = DataManager('datasets/vowel.arff')
data.normalize()
x_train, x_test, y_train_num, y_test_num = data.test_train_split(.75)
y_train = one_hot(y_train_num, num_class=11)
y_test = one_hot(y_test_num, num_class=11)

In [None]:
results = []
for hd in [10*x for x in range(10)]:
    acc_list = []
    for i in range(5):
        net = NeuralNet(learning_rate=lr, hidden_dim=hd)
        net.train(x_train, y_train_num, optim='sgd', verbose=False)
        y_pred = net.predict(x_test)
        test_acc = np.mean(y_pred == y_test_num)
        acc_list.append(test_acc)
        
    avg = np.mean(acc_list)
    results.append(avg)

In [None]:
plt.plot(results)

In [None]:
results