In [1]:
import pandas as pd
import numpy as np
p = pd.read_csv('C:\\Users\\arthu\\Anaconda3\\Kepler\\exoTrain.csv')
test = pd.read_csv('C:\\Users\\arthu\\Anaconda3\\Kepler\\exoTest.csv')

In [2]:
#1 = Exoplanet star, 0 = non-exoplanet star
p['LABEL'] = p['LABEL'] - 1 
test['LABEL'] = test['LABEL'] - 1

labels = p['LABEL']
tLabels = test['LABEL']

yTest = np.transpose(np.asmatrix(tLabels))
Y = np.transpose(np.asmatrix(labels))

testFlux = test.iloc[:, 1:3198]

flux = p.iloc[:, 1:3198]
flux = np.divide(flux, np.asmatrix(np.amax(np.absolute(flux))))
size = np.shape(flux)[0]

In [3]:
def layer_size(inputs, hidden, labels):
    n_x = inputs.shape[1]
    n_h = hidden
    n_y = labels.shape[1]
    
    return (n_x, n_h, n_y)

In [4]:
def initialize_parameters(n_x, n_h, n_y):
    
    W1 = np.random.randn(n_h, n_x)
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h)
    b2 = np.zeros((n_y, 1))
    
    parameters = {'W1' : W1, 'W2' : W2, 'b1' : b1, 'b2' : b2}
    return parameters

In [5]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [6]:
def forward_propagation(inputs, parameters):
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']
    
    Z1 = np.matmul(W1, inputs.T) + b1
    A1 = np.tanh(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    mem = {'A1' : A1, 'Z1' : Z1, 'A2' : A2, 'Z2' : Z2}
    return mem

In [7]:
def compute_cost(A2, Y, parameters, lambd):
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    
    cost = (- 1 / size) * np.sum(np.multiply(np.log(A2), Y.T) + np.multiply(np.log(1-A2), 1 - Y.T))
    cost = cost + (lambd / (2 * size)) * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    
    return cost

In [8]:
def backward_propagation(parameters, cache, X, Y, lambd):
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    A1 = cache['A1']
    A2 = cache['A2']
    
    dZ2 = A2 - Y.T
    dW2 = (1 / size)  * np.matmul(dZ2, A1.T) + (lambd / size) * W2
    db2 = (1 / size) * np.sum(dZ2, axis = 1)
    dZ1 = np.multiply((1 - np.power(A1, 2)), np.matmul(W2.T, dZ2))
    dW1 = (1 / size) * np.matmul(dZ1, X) + (lambd / size) * W1
    db1 = (1 / size) * np.sum(dZ1, axis = 1)
    
    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    
    return grads

In [9]:
def update_parameters(parameters, grads, rate):
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    
    W1 = W1 - rate * dW1
    b1 = b1 - rate * db1
    W2 = W2 - rate * dW2
    b2 = b2 - rate * db2
    
    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    
    return parameters

In [10]:
def nn(X, Y, n_h, iterations, rate, lambd):
    
    layersize = layer_size(X, n_h, Y)
    
    parameters = initialize_parameters(layersize[0], n_h, layersize[2])
    
    for i in range(0, iterations):
        m = forward_propagation(X, parameters)
        cost = compute_cost(m['A2'], Y, parameters, lambd)
        g = backward_propagation(parameters, m, X, Y, lambd)
        parameters = update_parameters(parameters, g, rate)
        
        if i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
            
    return parameters

In [11]:
def predict(parameters, X):
    a = forward_propagation(X, parameters)
    predict = np.round(a['A2'])
    return predict

In [18]:
parameters = nn(flux, Y, 7, 1000, 1, 10)
predictions = predict(parameters, flux)
print("train accuracy: {} %".format(100 - np.mean(np.abs(predict(parameters, flux) - Y)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(predict(parameters, testFlux) - yTest)) * 100))

Cost after iteration 0: 22.477736
Cost after iteration 100: 14.740555
Cost after iteration 200: 9.959040
Cost after iteration 300: 6.733401
Cost after iteration 400: 4.557240
Cost after iteration 500: 3.089059
Cost after iteration 600: 2.098506
Cost after iteration 700: 1.430188
Cost after iteration 800: 0.979267
Cost after iteration 900: 0.675015
train accuracy: 99.27265578926676 %
test accuracy: 99.12280701754386 %
