In [1]:

import numpy as np
import os
import sklearn
import random 
from sklearn import datasets
from sklearn import linear_model

import os
import pandas as pd
import numpy as np
from PIL import Image

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import seaborn as sns
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import matplotlib

%matplotlib inline

import time

In [2]:
def init_weights(num_input_features, num_hidden_units, num_output_units):
    """initialize weights uniformly randomly with small values"""
    w1 = np.random.uniform(-1.0, 1.0, size=num_hidden_units*(num_input_features + 1)
                          ).reshape(num_hidden_units, num_input_features + 1)
    w2 = np.random.uniform(-1.0, 1.0, size=num_output_units*(num_hidden_units+1)).reshape(num_output_units, num_hidden_units+ 1)
    return w1, w2
# w1, w2 = init_weights(784, 30, 10)
# print(w1.shape) # expect 
# print(w2.shape) # expect

In [3]:
 def encode_labels(y):
        """ Encode labels into a one-hot representation
            Params:
            y: array of num_samples, contains the target class labels for each training example.
            For example, y = [2, 1, 3, 3] -> 4 training samples, and the ith sample has label y[i]
            k: number of output labels
            returns: onehot, a matrix of labels by samples. For each column, the ith index will be
            "hot", or 1, to represent that index being the label.
        """
        onehot = np.zeros((len(np.unique(y)), y.shape[0]))
#         print('onehot >>> ', onehot.shape, 'y.shape[0] >>> ', y.shape[0], 'y distinct', set(y))
        for i in range(y.shape[0]):
            onehot[y[i], i] = 1.0
        return onehot

In [4]:
def softmax(v):
        """Calculates the softmax function that outputs a vector of values that sum to one.
            We take max(softmax(v)) to be the predicted label. The output of the softmax function
            is also used to calculate the cross-entropy loss
        """
        logC = -np.max(v)
        return np.exp(v + logC)/np.sum(np.exp(v + logC), axis = 0)
    
        """The formula is mathematically equivalent to standard softmax function. However, since exponentials can be large
            and unstable to compute the ratio, individual values are subtracted from the max value 
        """

In [5]:
def tanh(z, deriv=False):
        """ Compute the tanh function or its derivative.
        """
        return np.tanh(z) if not deriv else 1 - np.square(np.tanh(z))

In [6]:
""" Relu function has been working better in recent times since it is a constant slope increasing function unlike
    sigmoid or tanh where the slope becomes close to 0 in the extreme values"""
def relu(z, deriv = False):
        if not deriv:
            relud = z
            relud[relud < 0] = 0
            return relud
        deriv = z
        deriv[deriv <= 0] = 0
        deriv[deriv > 0] = 1
        return deriv

In [7]:
def sigm(z, deriv = False):
    
    sig = 1/(1+np.exp(-z))
    return sig if not deriv else sig*(1-sig)

In [8]:
def add_bias_unit(X, column=True):
        """Adds a bias unit to our inputs"""
        if column:
            bias_added = np.ones((X.shape[0], X.shape[1] + 1))
            bias_added[:, 1:] = X
        else:
            bias_added = np.ones((X.shape[0] + 1, X.shape[1]))
            bias_added[1:, :] = X

        return bias_added

In [9]:
def forward(X, w1, w2, activation, do_dropout = True):
        """ Compute feedforward step
            Params:
            X: matrix of num_samples by num_features, input layer with samples and features
            w1: matrix of weights from input layer to hidden layer. Dimensionality of 
            num_hidden_units by num_features + 1 (bias)
            w2: matrix of weights from hidden layer to output layer. Dimensionality of 
            num_output_units (equal to num class labels) by num_hidden units + 1 (bias)
            dropout: If true, randomly set half of the activations to zero to prevent overfitting.
        """
        #the activation of the input layer is simply the input matrix plus bias unit, added for each sample.
        z1 = add_bias_unit(X)
#         if self.dropout and do_dropout: a1 = self.compute_dropout(a1)
        #the input of the hidden layer is obtained by applying our weights to our inputs. 
#             We essentially take a linear combination of our inputs
        z2 = w1.dot(z1.T)
        #applies the tanh function to obtain the input mapped to a distrubution of values between -1 and 1
        if activation == 'tanh':
            z3 = tanh(z2)
        elif activation == 'sigmoid':
            z3 = sigm(z2)
        elif activation == 'relu':
            z3 = relu(z2)
        #add a bias unit to activation of the hidden layer.
        z3 = add_bias_unit(z3, column=False)
#         if self.dropout and do_dropout: a2 = self.compute_dropout(a2)
        # compute input of output layer in exactly the same manner.
        z4 = w2.dot(z3)
        # the activation of our output layer is just the softmax function.
        z5 = softmax(z4)
        return z1, z2, z3, z4, z5

In [10]:
def get_cost(y_enc, output, w1, w2):
        """ Compute the cost function.
            Params:
            y_enc: array of num_labels x num_samples. class labels one-hot encoded
            output: matrix of output_units x samples - activation of output layer from feedforward
            w1: weight matrix of input to hidden layer
            w2: weight matrix of hidden to output layer
            """
        cost = - np.sum(y_enc*np.log(output))
        # add the L2 regularization by taking the L2-norm of the weights and multiplying it with our constant.
        l2_term = (l2/2.0) * (np.sum(np.square(w1[:, 1:])) + np.sum(np.square(w2[:, 1:])))
        cost = cost + l2_term
        return cost/y_enc.shape[1] #Average Cost

In [11]:
def backprop(z1, z3, z5, z2, y_enc, w1, w2, activation):
        """ Computes the gradient using backpropagation
            Params:
            a1: array of n_samples by features+1 - activation of input layer (just input plus bias)
            a2: activation of hidden layer
            a3: activation of output layer
            z2: input of hidden layer
            y_enc: onehot encoded class labels
            w1: weight matrix of input layer to hidden layer
            w2: weight matrix of hidden to output layer
            returns: grad1, grad2: gradient of weight matrix w1, gradient of weight matrix w2
        """
        #backpropagate our error
        sigma3 = z5 - y_enc
        z2 = add_bias_unit(z2, column=False)
        if activation == 'tanh':
            sigma2 = w2.T.dot(sigma3) * tanh(z2, deriv=True)
        elif activation == 'sigmoid':
            sigma2 = w2.T.dot(sigma3) * sigm(z2, deriv=True)
        elif activation == 'relu':
            sigma2 = w2.T.dot(sigma3) * relu(z2, deriv=True)
        #get rid of the bias row
        sigma2 = sigma2[1:, :]
        grad1 = sigma2.dot(z1)
        grad2 = sigma3.dot(z3.T)
         # add the regularization term
        grad1[:, 1:]+= (w1[:, 1:]*l2) # derivative of .5*l2*w1^2
        grad2[:, 1:]+= (w2[:, 1:]*l2) # derivative of .5*l2*w2^2
        return grad1, grad2

In [12]:
def predict_nn(X, w1, w2, activation, dropout = False):
        """Generate a set of predicted labels for the input dataset"""
        z1, z2, z3, z4, z5 = forward(X, w1, w2, activation, do_dropout = dropout)
        #z5 is of dimension output units x num_samples. each row is an array representing the likelihood that the sample belongs to the class label given by the index...
        #ex: first row of z5 = [0.98, 0.78, 0.36]. This means our network has 3 output units = 3 class labels. And this instance most likely belongs to the class given by the label 0.
        y_pred = np.argmax(z5, axis = 0)
        return y_pred

In [13]:
def accuracy(X_train, y_train, w1, w2, activation):
        """Calculate the training accuracy. Requires passing through the entire dataset."""
#         print('w1 >>> ', w1, 'w2 >>> ', w2)
        y_train_pred = predict_nn(X = X_train, w1 = w1, w2 = w2, activation = activation)
        diffs = y_train_pred - y_train
        count = 0.
        for i in range(y_train.shape[0]):
            if diffs[i] != 0:
                count+=1
        return 100 - count*100/y_train.shape[0]

In [33]:
def build_model(X_data, y_data, X_test, y_test, n_output, n_features, n_hidden, l2, epochs, learning_rate, momentum_const, 
                decay_rate, minibatch_size, optimizer, activation, check_gradients, nesterov, print_progress):
   
    y_enc = encode_labels(y_data)
    
    #     Intialize Weights
#     w1, w2 = init_weights(n_features, n_hidden, n_output)
    w1, w2 = wt1, wt2

    """ Learn weights from training data
        Params:
        X: matrix of samples x features. Input layer
        y: target class labels of the training instances (ex: y = [1, 3, 4, 4, 3])
        print_progress: True if you want to see the loss and training accuracy, but it is expensive.
    """
    
    accs = []
    
    # PREVIOUS GRADIENTS
    prev_grad_w1 = np.zeros(w1.shape)
    prev_grad_w2 = np.zeros(w2.shape)
    # PREVIOUS WEIGHTS(MOMENTUM)
    prev_w1 = np.zeros(w1.shape)
    prev_w2 = np.zeros(w2.shape)
    prev2_w1 = np.zeros(w1.shape)
    prev2_w2 = np.zeros(w2.shape)
    print("fitting")
    costs = []
    grad_1_li, grad_2_li = [], [] # used to keep list of gradients which can be used to measure and differentiate between learning speed of input -> hidden and hidden -> output layer weights

    #     supported_optimizers = ['Gradient Descent', 'Momentum', 'Nesterov', 'Adam', 'Adagrad', 'Adadelta', 'RMSProp']
    supported_optimizers = ['Gradient Descent', 'Momentum']
    if optimizer not in supported_optimizers:
        print("Error: unsupported optimizer requested.")
        print("Available optimizers: {}".format(supported_optimizers))
    #         exit()

    #     supported_activations = ['relu', 'tanh', 'sigmoid', 'maxout', 'elu']
    supported_activations = ['relu', 'tanh', 'sigmoid']
    if activation not in supported_activations:
        print("Error: unsupported activation requested.")
        print("Available activations: {}".format(supported_activations))

    #pass through the dataset
    for i in range(epochs):
        previous_accuracies = []
        learning_rate /= (1 + decay_rate*i)
        mini = np.array_split(range(y_data.shape[0]), minibatch_size)
        grads_w1, grads_w2 = [], [] # needed if we want to remember averages of gradients across time

        for idx in mini:
            #feed feedforward
            z1, z2, z3, z4, z5 = forward(X_data[idx], w1, w2, activation)

            cost = get_cost(y_enc = y_enc[:, idx], output = z5, w1 = w1, w2 = w2)
            costs.append(cost)

            #compute gradient via backpropagation

            grad1, grad2 = backprop(z1=z1, z3=z3, z5=z5, z2=z2, y_enc=y_enc[:, idx], w1=w1, w2=w2, activation=activation)
            grad_1_li.append(grad1)
            grad_2_li.append(grad2)

            # update parameters, multiplying by learning rate + momentum constants
            # w1_update, w2_update = self.momentum_optimizer(self.learning_rate, grad1, grad2)
            w1_update, w2_update = learning_rate*grad1, learning_rate*grad2

            prev2_w1, prev2_w2 = prev_w1, prev_w2
            prev_w1, prev_w2 = w1, w2
            if nesterov:
                print('NOT AVAILABLE NOW')
                # v_prev = v # back this up
                # v = mu * v - learning_rate * dx # velocity update stays the same
                # x += -mu * v_prev + (1 + mu) * v # position update changes form
                # psuedocode from http://cs231n.github.io/neural-networks-3/#sgd
    #                 v1 = momentum_const * prev_grad_w1 - w1_update
    #                 v2 = momentum_const * prev_grad_w2 - w2_update
    #                 w1 += -self.momentum_const * prev_grad_w1 + (1 + self.momentum_const) * v1
    #                 w2 += -self.momentum_const * prev_grad_w2 + (1 + self.momentum_const) * v2
            else:
                # gradient update: w += -alpha * gradient.
                # use momentum - add in previous gradient mutliplied by a momentum hyperparameter.
                w1 += -((1-momentum_const)*w1_update + (momentum_const*(prev2_w1 - prev_w1)))
                w2 += -((1-momentum_const)*w2_update + (momentum_const*(prev2_w2 - prev_w2)))

            acc_train = accuracy(X_data, y_data, w1, w2, activation)
            acc_test = accuracy(X_test, y_test, w1, w2, activation)
            
            print('Epoch: ', i)
            print('Training Acc: ', acc_train, '\nTest Acc: ', acc_test)
            accs.append([acc_train, acc_test])
#         if print_progress and (i+1) % 50==0:
#             print("Epoch: {}".format(i + 1))
#             print("Loss: {}".format(cost))
    #             if self.check_gradients:
    #                 print("Gradient Error: {}".format(w1_grad_error))
#             grad_1_mag, grad_2_mag = np.linalg.norm(grad_1_li), np.linalg.norm(grad_2_li)
#             print("grad1 mag: {}, grad2 mang: {}".format(grad_1_mag, grad_2_mag))
    #         print('w1 >>> ', w1, 'w2 >>> ', w2)
#             acc = accuracy(X_data, Y_data, w1, w2)
#             previous_accuracies.append(acc)
    #             if early_stop is not None and len(previous_accuracies) > 3:
    #                 if abs(previous_accuracies[-1] - previous_accuracies[-2]) < self.early_stop and abs(previous_accuracies[-1] - previous_accuracies[-3]) < self.early_stop:
    #                     print("Early stopping, accuracy has stayed roughly constant over last 100 iterations.")
    #                     break


#             print("Training Accuracy: {}".format(acc))

    # # Assign new parameters to the model
#     model = { 'W1': w1[:, :2], 'b1': w1[:, 2], 'W2': w2[:, :2], 'b2': w2[:, 2]}
    model = { 'W1': w1, 'W2': w2, 'Acc': accs}
        
    return [model]


In [34]:
# # t0 = time.time()

# def kfold_nn(X_copy, y_copy, n_output, n_features, n_hidden, l2, epochs, learning_rate, momentum_const, 
#                       decay_rate, minibatch_size, optimizer, activation, check_gradients, nesterov, print_progress, 
#                       k = 5):

#     train_cv_acc = []
#     test_cv_acc = []
#     # cv_acc = []
    
#     X_folds = np.array_split(X_copy, k)
#     y_folds = np.array_split(y_copy, k)

#     for cnt in range(len(X_folds)):
#     # for cnt in range(1):
#     #     cnt = 2
#         X_train = list(X_folds)
#         X_test = X_train.pop(cnt)
#         X_train = np.concatenate(X_train)

#         y_train = list(y_folds)
#         y_test = y_train.pop(cnt)
#         y_train = np.concatenate(y_train)
        
#         model_train = build_model(X_train, y_train, n_output, n_features, n_hidden, l2, epochs, learning_rate, momentum_const, decay_rate, minibatch_size, 
#                    optimizer, activation, check_gradients, nesterov, print_progress)

#         acc_train = accuracy(X_train, y_train, model_train[0]['W1'], model_train[0]['W2'], activation)
#         acc_test = accuracy(X_test, y_test, model_train[0]['W1'], model_train[0]['W2'], activation)
        
#         print('k = ', cnt+1)
#         print('Training Accuracy: ', acc_train)
#         print('\tTest Accuracy: ', acc_test)

#         train_cv_acc.append(acc_train)
#         test_cv_acc.append(acc_test)


#     return np.mean(train_cv_acc), np.std(train_cv_acc), np.mean(test_cv_acc), np.std(test_cv_acc)

    
# # t1 = time.time()

# # print('Time Taken >> ', t1-t0)

In [35]:
def kfold_nn(X_copy, y_copy, X_test, y_test, n_output, n_features, n_hidden, l2, epochs, learning_rate, momentum_const, 
                      decay_rate, minibatch_size, optimizer, activation, check_gradients, nesterov, print_progress, 
                      k = 5):

#     X_train, X_test, y_train, y_test = train_test_split(X_copy, y_copy, test_size = 0.25)

#     for cnt in range(len(X_folds)):
    for cnt in range(1):
        cnt = 2
#         X_train = list(X_folds)
#         X_test = X_train.pop(cnt)
#         X_train = np.concatenate(X_train)

#         y_train = list(y_folds)
#         y_test = y_train.pop(cnt)
#         y_train = np.concatenate(y_train)
        
#         model_train = build_model(X_train, y_train, X_test, y_test, n_output, n_features, n_hidden, l2, epochs, learning_rate, momentum_const, decay_rate, minibatch_size, 
#                    optimizer, activation, check_gradients, nesterov, print_progress)
        model_train = build_model(X_copy, y_copy, X_test, y_test, n_output, n_features, n_hidden, l2, epochs, learning_rate, momentum_const, decay_rate, minibatch_size, 
                   optimizer, activation, check_gradients, nesterov, print_progress)

#         acc_train = accuracy(X_train, y_train, model_train[0]['W1'], model_train[0]['W2'], activation)
#         acc_test = accuracy(X_test, y_test, model_train[0]['W1'], model_train[0]['W2'], activation)
        
#         print('k = ', cnt+1)
#         print('Training Accuracy: ', acc_train)
#         print('\tTest Accuracy: ', acc_test)

#         train_cv_acc.append(acc_train)
#         test_cv_acc.append(acc_test)

    return model_train[0]['W1'], model_train[0]['W2'], model_train[0]['Acc']

In [17]:
# Change the Current Path
os.chdir('/Users/santanupaul/Documents/Personal/Masters in Analytics/UConn/Study Related/Python/Project/fer2013')

!pwd

/Users/santanupaul/Documents/Personal/Masters in Analytics/UConn/Study Related/Python/Project/fer2013


In [18]:
# Import the pixel matrix 
df = pd.read_csv('fer2013.csv')

df.head()

Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training


In [19]:
print(df.shape)
df1 = df[(df['emotion'] == 3) | (df['emotion'] == 4)] # 3 happy, and 4 Sad
df1.emotion = df1.emotion - 3
print(df1.shape)

(35887, 3)
(15066, 3)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [20]:
# Split the pixel columns to form different fields for each pixel
df1 = pd.concat([df1[['emotion']], df1['pixels'].str.split(" ", expand = True)], axis = 1)
df1.head()

X = df1.iloc[:, 1:].values #Store as numpy array
Y = df1.iloc[:, 0].values

X = X.astype(int)

In [21]:
# Standardization: Although data is in the same scale (values from 1 to 255) mean = 0 and std = 1 is recommended for most
# machine learning algorithms
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)



Data with input dtype int64 was converted to float64 by StandardScaler.



In [22]:
# Import the 3 dimensionality reduction methods
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)

# X_std.shape
X_temp, X_tst, Y_temp, Y_tst = train_test_split(X_std, Y, test_size = 0.2, random_state = 51)
X_trn, X_val, Y_trn, Y_val = train_test_split(X_temp, Y_temp, test_size = 0.2, random_state = 51)
lda = LDA(n_components=1)
# # Taking in as second argument the Target as labels
X_LDA_2D = lda.fit_transform(X_trn, Y_trn)
X_LDA_val = lda.transform(X_val)
X_LDA_tst = lda.transform(X_tst)
X_LDA_2D.shape # Getting only one linear discriminant since number of classes = 2


This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.


Data with input dtype int64 was converted to float64 by StandardScaler.



(9641, 1)

In [39]:
t0 = time.time()

""" Feedforward neural network with a single hidden layer
    Params:
    n_output: int: number of output units, equal to num class labels
    n_features: int: number of features in the input dataset
    n_hidden: int: (default 30): num hidden units
    l2: float(default: 0.0) - lambda value for L2 regularization
    epochs: int (default = 500) - passes over training set
    learning_rate: float (default: 0.001) - learning reate
    momentum_const: float (default: 0.0) - momentum constant - multiplied with gradient of previous pass through set
    decay_rate: float (default 0.0) - shrinks learning rate after each epoch
    minibatch_size: int (default: 1) - divides training data into batches for efficiency
"""
n_output = len(set(Y_trn))
n_features = X_LDA_2D.shape[1]
n_hidden_min = 50
n_hidden_num = 1
l2 = 1000
epochs = 500
learning_rate = 0.000001
momentum_const = 0
decay_rate = 0
minibatch_size = 1
optimizer = 'Gradient Descent'
activation = 'tanh'
check_gradients = True
nesterov = False
print_progress = True

hidden_cv = [[0 for x in range(5)] for y in range(n_hidden_num)]

for hid in range(n_hidden_num):
    
    n_hidden = hid+n_hidden_min
    print('Number of Nodes in Hidden Layer = ', n_hidden)
    
    hidden_cv[hid][0] = n_hidden
    
    """ k-Fold Cross Validation """
    w1, w2, Acc = kfold_nn(X_LDA_2D, Y_trn, X_LDA_val, Y_val, n_output, 
                            n_features, n_hidden, l2, epochs, learning_rate, momentum_const, 
                          decay_rate, minibatch_size, optimizer, activation, check_gradients, nesterov, print_progress, 
                          k = 1)
    
    


t1 = time.time()
print('Time Taken >> ', t1-t0)

Number of Nodes in Hidden Layer =  50
fitting
Epoch:  0
Training Acc:  81.13266258686858 
Test Acc:  65.74035669846538
Epoch:  1
Training Acc:  81.13266258686858 
Test Acc:  65.74035669846538
Epoch:  2
Training Acc:  81.13266258686858 
Test Acc:  65.74035669846538
Epoch:  3
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  4
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  5
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  6
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  7
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  8
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  9
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  10
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  11
Training Acc:  81.12229021885696 
Test Acc:  65.74035669846538
Epoch:  12
Training Acc:  81.12229021885696 
Test Acc:  65.78183326420572
Ep

Epoch:  112
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  113
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  114
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  115
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  116
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  117
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  118
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  119
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  120
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  121
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  122
Training Acc:  81.06005601078726 
Test Acc:  65.82330982994608
Epoch:  123
Training Acc:  81.07042837879888 
Test Acc:  65.82330982994608
Epoch:  124
Training Acc:  81.07042837879888 
Test Acc:  65.82330982994608
Epoch:  125
Training Acc:

Epoch:  222
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  223
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  224
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  225
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  226
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  227
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  228
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  229
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  230
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  231
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  232
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  233
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  234
Training Acc:  81.03931127476403 
Test Acc:  65.86478639568644
Epoch:  235
Training Acc:

Epoch:  334
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  335
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  336
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  337
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  338
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  339
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  340
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  341
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  342
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  343
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  344
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  345
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  346
Training Acc:  81.00819417072918 
Test Acc:  65.86478639568644
Epoch:  347
Training Acc:

Epoch:  446
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  447
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  448
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  449
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  450
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  451
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  452
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  453
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  454
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  455
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  456
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  457
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  458
Training Acc:  81.02893890675242 
Test Acc:  65.94773952716716
Epoch:  459
Training Acc:

In [32]:
# wt1, wt2 = w1, w2
wt1, wt2 = w1, w2

In [279]:
Acc_df = pd.DataFrame(Acc)
Acc_df.to_csv("Solution Trajectory.csv")

In [40]:
accuracy(X_LDA_tst, Y_tst, w1, w2, activation)

65.69343065693431

In [42]:
np.random.ra

array([[ 0.2156779 ,  0.915761  ,  0.36210628,  0.62009538],
       [ 0.52767063,  0.86684883,  0.14267155,  0.67198652],
       [ 0.99476113,  0.12171233,  0.42696197,  0.58546165]])