In [2]:
# Package imports
import numpy as np
from sklearn.model_selection import train_test_split

# Matplotlib is a matlab like plotting library
import matplotlib
from matplotlib import pyplot as plt
# SciKitLearn is a useful machine learning utilities library
import sklearn
# The sklearn dataset module helps generating |datasets
import sklearn.datasets
import sklearn.linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import normalize
from scipy.stats import gaussian_kde

In [3]:
# importing dataset
# import data
from DataExtraction import dataNoMass 
from DataExtraction import dataWithP2
from DataExtraction import dataWithP2E2 
from DataExtraction import dataWithMass 
from DataExtraction import p2E2 
from DataExtraction import e2P2Dec as data
from DataExtraction import labels2D as labels
#from DataExtraction import labels
data = np.array(data)
labels = np.array(labels)
# labels = np.row_stack(labels)

In [4]:
# split data 
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, train_size=0.5, test_size=0.5, random_state=42)

In [5]:
# normalize test data
# train_data[:,0] = train_data[:,0] / np.linalg.norm(train_data[:,0]) # normalize column 0
# train_data[:,1] = train_data[:,1] / np.linalg.norm(train_data[:,1]) # normalize column 1
#normalize train data
# test_data[:,0] = test_data[:,0] / np.linalg.norm(test_data[:,0]) # normalize column 0
# test_data[:,1] = test_data[:,1] / np.linalg.norm(test_data[:,1]) # normalize column 1

In [6]:

avgE2 = np.mean(train_data)

In [7]:
train_data = train_data/avgE2
test_data = test_data/avgE2

In [8]:
# messing with the number of training data points
# train_data = train_data[0:9]
# train_labels = train_labels[0:9]

In [9]:
# Now we define all our functions

def softmax(z):
    #Calculate exponent term first
    exp_scores = np.exp(z)
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# loss functions
def softmax_loss(y,y_hat):
    # Clipping value
    minval = 0.000000000001
    # Number of samples
    m = y.shape[0]
    # Loss formula, note that np.sum sums up the entire matrix and therefore does the job of two sums from the formula
    loss = -1/m * np.sum(y * np.log(y_hat.clip(min=minval)))
    #loss = -1/m * np.sum(y * np.log(y_hat))
    return loss

def crossEntropy_loss(y, y_hat):
    m = y.shape[0]
    if y.all() == 1:
        return -1/m * np.sum(np.log(y_hat))
    else:
        return -1/m * np.sum(np.log(1 - y_hat))

def mse_loss(y, y_hat):
    m = y.shape[0]
    return np.sum((y_hat - y)**2) / m
    
def loss_derivative(y,y_hat):
    return (y_hat-y)

def tanh_derivative(x):
    return (1 - np.power(x, 2))

# This is the forward propagation function
def forward_prop(model,a0):
    
    #Start Forward Propagation
    
    # Load parameters from model
    W1, b1, W2, b2, W3, b3 = model['W1'], model['b1'], model['W2'], model['b2'], model['W3'],model['b3']
    
    # Do the first Linear step 
    # Z1 is the input layer x times the dot product of the weights + our bias b
    z1 = a0.dot(W1) + b1
    
    # Put it through the first activation function
    a1 = np.tanh(z1)
    
    # Second linear step
    z2 = a1.dot(W2) + b2
    
    # Second activation function
    a2 = np.tanh(z2)
    
    #Third linear step
    z3 = a2.dot(W3) + b3
    
    #For the Third linear activation function we use the softmax function, either the sigmoid of softmax should be used for the last layer
    a3 = softmax(z3)
    
    #Store all results in these values
    cache = {'a0':a0,'z1':z1,'a1':a1,'z2':z2,'a2':a2,'a3':a3,'z3':z3}
    return cache

# This is the BACKWARD PROPAGATION function
def backward_prop(model,cache,y):

    # Load parameters from model
    W1, b1, W2, b2, W3, b3 = model['W1'], model['b1'], model['W2'], model['b2'],model['W3'],model['b3']
    # Load forward propagation results
    a0,a1, a2,a3 = cache['a0'],cache['a1'],cache['a2'],cache['a3']
    
    # Get number of samples
    m = y.shape[0]
    
    # Calculate loss derivative with respect to output
    dz3 = loss_derivative(y=y,y_hat=a3)

    # Calculate loss derivative with respect to second layer weights
    dW3 = 1/m*(a2.T).dot(dz3) #dW2 = 1/m*(a1.T).dot(dz2) 
    
    # Calculate loss derivative with respect to second layer bias
    db3 = 1/m*np.sum(dz3, axis=0)
    
    # Calculate loss derivative with respect to first layer
    dz2 = np.multiply(dz3.dot(W3.T) ,tanh_derivative(a2))
    
    # Calculate loss derivative with respect to first layer weights
    dW2 = 1/m*np.dot(a1.T, dz2)
    
    # Calculate loss derivative with respect to first layer bias
    db2 = 1/m*np.sum(dz2, axis=0)
    
    dz1 = np.multiply(dz2.dot(W2.T),tanh_derivative(a1))
    
    dW1 = 1/m*np.dot(a0.T,dz1)
    
    db1 = 1/m*np.sum(dz1,axis=0)
    
    # Store gradients
    grads = {'dW3':dW3, 'db3':db3, 'dW2':dW2,'db2':db2,'dW1':dW1,'db1':db1}
    return grads

#TRAINING PHASE
def initialize_parameters(input_dim,l1_dim, l2_dim, output_dim):
    # First layer weights
    W1 = 2 *np.random.randn(input_dim, l1_dim) - 1
    
    # First layer bias
    b1 = np.zeros((1, l1_dim))
    
    # Second layer weights
    W2 = 2 * np.random.randn(l1_dim, l2_dim) - 1
    
    # Second layer bias
    b2 = np.zeros((1, l2_dim))
    W3 = 2 * np.random.rand(l2_dim, output_dim) - 1
    b3 = np.zeros((1,output_dim))
    
    
    # Package and return model
    model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2,'W3':W3,'b3':b3}
    return model

def update_parameters(model,grads,learning_rate):
    # Load parameters
    W1, b1, W2, b2,b3,W3 = model['W1'], model['b1'], model['W2'], model['b2'],model['b3'],model["W3"]
    
    # Update parameters
    W1 -= learning_rate * grads['dW1']
    b1 -= learning_rate * grads['db1']
    W2 -= learning_rate * grads['dW2']
    b2 -= learning_rate * grads['db2']
    W3 -= learning_rate * grads['dW3']
    b3 -= learning_rate * grads['db3']
    
    # load parameters into running lists
#     w00s.append(W1[0][0]) # modifies global list
#     w01s.append(W1[0][1]) # modifies global list
#     w02s.append(W1[0][2]) # modifies global list
#     w03s.append(W1[0][3]) # modifies global list
#     w04s.append(W1[0][4]) # modifies global list
    
#     w10s.append(W1[1][0]) # modifies global list
#     w11s.append(W1[1][1]) # modifies global list
#     w12s.append(W1[1][2]) # modifies global list
#     w13s.append(W1[1][3]) # modifies global list
#     w14s.append(W1[1][4]) # modifies global list
    
#     b0s.append(b1[0][0]) # modifies global list
#     b1s.append(b1[0][1]) # modifies global list
#     b2s.append(b1[0][2]) # modifies global list
#     b3s.append(b1[0][3]) # modifies global list
#     b4s.append(b1[0][4]) # modifies global list

    # Store and return parameters
    model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2, 'W3':W3,'b3':b3}
    return model
def predict(model, x):
    # Do forward pass
    c = forward_prop(model,x)
    #get y_hat
    y_hat = c['a3']
    # plotArr.append([x, y_hat]) #added to make plot
    return y_hat
def calc_accuracy(model,x,y):
    # Get total number of examples
    m = y.shape[0]
    # Do a prediction with the model
    pred = predict(model,x)
    # Ensure prediction and truth vector y have the same shape
    pred = pred.reshape(y.shape)
    # Calculate the number of wrong examples
    error = np.sum(np.abs(pred-y))
    # Calculate accuracy
    return (m - error)/m * 100
def train(model,X_,y_,learning_rate, epochs=2001, print_loss=False):
    # Gradient descent. Loop over epochs
    for i in range(0, epochs):

        # Forward propagation
        cache = forward_prop(model,X_)
        #a1, probs = cache['a1'],cache['a2']
        # Backpropagation
        
        grads = backward_prop(model,cache,y_)
        # Gradient descent parameter update
        # Assign new parameters to the model
        model = update_parameters(model=model,grads=grads,learning_rate=learning_rate)
        # it is at this point in the training that the weights get added to the lists
    
        a3 = cache['a3']
        thisLoss = mse_loss(y_,a3) # set loss function here
#         losses.append(thisLoss) # modifies global list
        y_hat = predict(model,X_) # getting rid of this because it's wrong
        y_true = y_.argmax(axis=1)
        accur = accuracy_score(a3,train_labels)
#         train_accuracies.append(accur) # modifies global list
        
    # test accuracy
#         if i % 50 == 0:
#             placeholderVar = accuracy_score(a3, train_labels)
#             test_accuracy = accuracyOfModel(model, test_data, test_labels)
# #             test_accuracies.append(test_accuracy) # modifies global list
#             test_num.append(i)
#         #Printing loss & accuracy every 100 iterations
        if print_loss and i % 300==0:
            print('Loss after iteration',i,':',thisLoss)
            print('Train Accuracy after iteration',i,':',accur*100,'%')
            # print('Test Accuracy after iteration',i,':',test_accuracy*100,'%')
    return model

# TESTING PHASE
# test the accuracy of any model
def accuracyOfModel(_model, _testData, _testLabels):
    y_pred = predict(_model,_testData) # make predictions on test data
    y_true = _testLabels # get usable info from labels
    return accuracy_score(y_pred, y_true)

def accuracy_score(_outputNodes, _labels):
    for i in range(len(_outputNodes)-1):
        if _outputNodes[i][0]>.5:
            _outputNodes[i]=[1,0]
        else:
            _outputNodes[i]=[0,1]
    numWrong = np.count_nonzero(np.subtract(_outputNodes,_labels))/2
    return (len(_outputNodes)-numWrong)/len(_outputNodes)

    

In [10]:
# declare global list
accuracies = []
for i in range (1, 7):
    for j in range (1, 7):
        learnRate = 0.02 # set learning rate here
        np.random.seed(0)
        # This is what we return at the end
        model = initialize_parameters(2, i, j, 2)
        model = train(model,train_data,train_labels,learning_rate=learnRate,epochs=901,print_loss=True) # original learning rate is 0.01
        accuracies.append([i, j, accuracyOfModel(model, test_data, test_labels)])

Loss after iteration 0 : 0.5107894410139484
Train Accuracy after iteration 0 : 50.517053930368384 %
Loss after iteration 300 : 0.5001650535464061
Train Accuracy after iteration 300 : 49.78635179894314 %
Loss after iteration 600 : 0.5001258396845615
Train Accuracy after iteration 600 : 49.80405046648631 %
Loss after iteration 900 : 0.5000955423884395
Train Accuracy after iteration 900 : 49.82680589618467 %
Loss after iteration 0 : 0.6360231954260281
Train Accuracy after iteration 0 : 50.517053930368384 %
Loss after iteration 300 : 0.4998600873640585
Train Accuracy after iteration 300 : 49.705443604460065 %
Loss after iteration 600 : 0.49955012287536205
Train Accuracy after iteration 600 : 49.756011226011985 %
Loss after iteration 900 : 0.49935144888561306
Train Accuracy after iteration 900 : 49.78888018002073 %
Loss after iteration 0 : 0.6126370922807138
Train Accuracy after iteration 0 : 49.48041768855402 %
Loss after iteration 300 : 0.5027044191968005
Train Accuracy after iteration 30

Loss after iteration 300 : 0.49377324670707357
Train Accuracy after iteration 300 : 66.3674749058178 %
Loss after iteration 600 : 0.492955989972309
Train Accuracy after iteration 600 : 69.52289449065762 %
Loss after iteration 900 : 0.4920580004536146
Train Accuracy after iteration 900 : 70.1802735708326 %
Loss after iteration 0 : 0.5064122139411507
Train Accuracy after iteration 0 : 50.01137771484918 %
Loss after iteration 300 : 0.5005108291352471
Train Accuracy after iteration 300 : 47.92293494475487 %
Loss after iteration 600 : 0.4996929129739368
Train Accuracy after iteration 600 : 47.72319283962479 %
Loss after iteration 900 : 0.4990990826403176
Train Accuracy after iteration 900 : 48.45642335212763 %
Loss after iteration 0 : 0.5051010953420706
Train Accuracy after iteration 0 : 50.375464590023014 %
Loss after iteration 300 : 0.4939745662740688
Train Accuracy after iteration 300 : 52.782483375894415 %
Loss after iteration 600 : 0.4921228591736549
Train Accuracy after iteration 600 

In [14]:
accuracies = np.array(accuracies)

In [15]:
print(accuracies[:,2])

[0.49801522 0.4978888  0.50160552 0.50168137 0.5014791  0.50218705
 0.36573032 0.50292028 0.50752193 0.46332583 0.49973452 0.53667417
 0.48767414 0.50489242 0.50807818 0.50911481 0.49925413 0.50984804
 0.47867311 0.50018963 0.70152461 0.48595484 0.68731511 0.4941468
 0.51343835 0.50676342 0.50084701 0.43139238 0.98098657 0.50256631
 0.49667518 0.51877323 0.50618189 0.52484134 0.46721954 0.96869864]


In [None]:
vb:>¬