In [2]:
import numpy as np
import pandas as pd

In [3]:
def sigmoid(x, derivative=False):
    z = 1/(1+np.exp(-x))
    if not derivative:
        return z
    return z * (1 - z)

In [4]:
def tanh(x, derivative=False):
    z = (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
    if not derivative:
        return z
    return 1-z**2

### Load data from CSV file

In [16]:
df = pd.read_csv('../data/50_startups.csv')
df.head(10)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94
5,131876.9,99814.71,362861.36,New York,156991.12
6,134615.46,147198.87,127716.82,California,156122.51
7,130298.13,145530.06,323876.68,Florida,155752.6
8,120542.52,148718.95,311613.29,New York,152211.77
9,123334.88,108679.17,304981.62,California,149759.96


### Create numpy matrix and vector (1-D array)

In [160]:
# take 10 rows and cols. 1, 2 and 3.
X = ?
#normalize 
X = (X-X.mean())/X.std()
#X=(df-df.min())/(df.max()-df.min()) # this is min-max normalization
X[:10]

array([[-0.34430656, -0.58889656,  2.29004227],
       [-0.36796056, -0.4644172 ,  2.05031658],
       [-0.44667417, -0.89625025,  1.74114261],
       [-0.52463909, -0.74558077,  1.52850231],
       [-0.54411137, -0.9801012 ,  1.38208909],
       [-0.63206005, -0.90769117,  1.35365907],
       [-0.6085173 , -0.50034068, -0.66782328],
       [-0.64563237, -0.51468705,  1.01851694],
       [-0.72949906, -0.48727291,  0.91309145],
       [-0.70549379, -0.83148549,  0.85608055]])

In [155]:
# generate vector 'y' of shape 10 x 1 and values 
np.random.seed(3)
y = ?
y[:10]

array([[0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1]])

In [167]:
# learning rate and no. of hidden layer nodes
alpha,hidden_dim = (0.6,X.shape[0]+2) 

In [168]:
weight_0 = 0.02*np.random.randn(X.shape[1], hidden_dim)
weight_1 = 0.02*np.random.randn(hidden_dim,1)

In [169]:
#iterate the process
for j in range(5000):
    ##### Forward propagation
    # sumation and activation
    a1 = np.dot(X,weight_0) # no bias
    layer_1 = sigmoid(a1)
    a2 = np.dot(layer_1,weight_1) # no bias
    layer_2 = sigmoid(a2)
    ##### Backward propagation 
    # calc. gradient (derivative) and find diff (delta)
    layer_2_delta = (layer_2 - y) * sigmoid(a2, True) #(layer_2*(1-layer_2))
    layer_1_delta = layer_2_delta.dot(weight_1.T) * sigmoid(a1, True) #(layer_1 * (1-layer_1))
    # adjust (substract) weights
    weight_1 -= (alpha * layer_1.T.dot(layer_2_delta))
    weight_0 -= (alpha * X.T.dot(layer_1_delta))
print ("Output After Training:")
# print labels or ground truth 
print(y)
# print predicted probabilities or scores 
print(layer_2)

Output After Training:
[[0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]]
[[0.00995854]
 [0.01440568]
 [0.97434351]
 [0.97347076]
 [0.0144513 ]
 [0.04231343]
 [0.01225785]
 [0.98623457]
 [0.99701183]
 [0.96790557]]


### use np.where() to calculate the predictions using probabilities
values higher than 0.98 should be returned as 1 otherwise 0

In [171]:
preds = np.where(layer_2??, 1, 0)
preds

array([[0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1]])

### use roc_auc_score() to get the accuracy
1. use np.squeeze() on y and layer_2 to convert into 1D
2. use roc_auc_score(...) to get the score

In [173]:
from sklearn.metrics import roc_auc_score
import numpy as np
y_true = np.squeeze(?)
y_scores = np.squeeze(?)
roc_auc_score(y_true, y_scores)

1.0