In [524]:
import numpy as np

**Goal 1: Create perceptron**

In [525]:
def perceptron(x, W, b):
    '''
    Input: weights W, biases b, input activations x
    Output: Single hypothesis
    '''
    z = np.sum(W.T * x) + b
    return sigmoid(z)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def deriv_sigmoid(z):
    return sigmoid(z) * (1-sigmoid(z))

In [625]:
activations = np.array([0.3, 0.1, 0.7, 8])
weights = np.random.rand(activations.shape[0])
bias = 1

perceptron(activations, weights, bias)

0.9887711125213482

**Goal 2: Get, Clean, & Normalize Data**

In [527]:
import pandas as pd
data = pd.read_csv("housepricedata.csv")
print(data.head())

   LotArea  OverallQual  OverallCond  TotalBsmtSF  FullBath  HalfBath  \
0     8450            7            5          856         2         1   
1     9600            6            8         1262         2         0   
2    11250            7            5          920         2         1   
3     9550            7            5          756         1         0   
4    14260            8            5         1145         2         1   

   BedroomAbvGr  TotRmsAbvGrd  Fireplaces  GarageArea  AboveMedianPrice  
0             3             8           0         548                 1  
1             3             6           1         460                 1  
2             3             6           1         608                 1  
3             3             7           1         642                 0  
4             4             9           1         836                 1  


In [528]:
houses = data.values # DataFrame --> Array
x = houses[:, :-1] # Input Activations
y = houses[:, -1] # Output labels (0 or 1)
print(x)

[[ 8450     7     5 ...     8     0   548]
 [ 9600     6     8 ...     6     1   460]
 [11250     7     5 ...     6     1   608]
 ...
 [ 9042     7     9 ...     9     2   252]
 [ 9717     5     6 ...     5     0   240]
 [ 9937     5     6 ...     6     0   276]]


In [529]:
from sklearn.preprocessing import MinMaxScaler
x = MinMaxScaler().fit(x).transform(x) # features between 0 and 1

In [530]:
from sklearn.model_selection import train_test_split
X_train, X_valtest, Y_train, Y_valtest = train_test_split(x, y, test_size=0.3) # 70% Train
X_val, X_test, Y_val, Y_test = train_test_split(X_valtest, Y_valtest, test_size=0.5) # 15% to test and val

In [531]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(x.shape)

(1022, 10)
(219, 10)
(219, 10)
(1460, 10)


**Goal 3: Forward Propagation**

In [586]:
#np.random.seed(0)
def forwardProp(activations, weights, biases, zArr, numLayers):
    '''
    Vectorized Implementation
    Input: First Layer activations, weights, biases, number of layers
    Output: Last layer
    '''
    # Layer i
    for i in range(numLayers-1):
        z = np.dot(weights[i], activations[i]) + biases[i]
        zArr.append(z)
        activations.append(sigmoid(z))

params = {
    "weights": [
    np.random.randn(X_train.shape[0], X_train.shape[1]) * np.sqrt(2/X_train.shape[0]), # l1
    np.random.randn(1, X_train.shape[0]) * np.sqrt(2/X_train.shape[0])  # l2
    ],
    
    "biases": np.ones(numLayers-1),    
    
    "numLayers": 3 
}

numLayers = 3
zArr = []
activations = [X_train.T]
weights = [
    np.random.randn(X_train.shape[0], X_train.shape[1]) * np.sqrt(2/X_train.shape[0]), # l1
    np.random.randn(1, X_train.shape[0]) * np.sqrt(2/X_train.shape[0])  # l2
          ]
biases = np.ones(numLayers-1) # bias in each layer except output

forwardProp(activations, weights, biases, zArr, numLayers)

In [623]:
def backwardProp(activations, weights, biases, numLayers, actual, zArr, weightDecay, learning_rate):
    # Compute Cost
    cost = np.mean(1/2 * np.linalg.norm(activations[-1]-actual) ** 2)
    for layer in range(numLayers - 1):
        for i in range(activations[layer].shape[0]):
            for j in range(activations[layer+1].shape[0]):
                cost += weights[layer][j][i] ** 2
    cost *= weightDecay / 2
    print(cost)
    
    # Output layer delta
    deltas = {}
    delta_nL = np.multiply(-(actual - activations[-1]), deriv_sigmoid(zArr[-1]))
    deltas[2] = delta_nL
    
    # Go through hidden layers (l2 --> index 1)
    for l in range(numLayers-2, 0, -1): 
        deltas[l] = np.dot(weights[l].T, deltas[l+1]) * deriv_sigmoid(zArr[l])
        deriv_W = np.dot(deltas[l+1], activations[l].T)
        deriv_b = deltas[l+1]
        
        print(deriv_W)
        weights[l] = weights[l] - learning_rate * deriv_W
        # todo: update biases
    
weightDecay = 0.5
learning_rate = 0.001
backwardProp(activations, weights, biases, numLayers, Y_train, zArr, weightDecay, learning_rate)

173776.4706329683
[[0. 0. 0. ... 0. 0. 0.]]


  # Remove the CWD from sys.path while we load stuff.


In [619]:
def runNetwork(activations, weights, biases, numLayers, actual, zArr, weightDecay, learning_rate):
    epochs = 10
    for x in range(epochs):
        forwardProp(activations, weights, biases, zArr, numLayers)
        backwardProp(activations, weights, biases, numLayers, Y_train, zArr, weightDecay, learning_rate)
    
runNetwork(activations, weights, biases, numLayers, Y_train, zArr, weightDecay, learning_rate)

  # Remove the CWD from sys.path while we load stuff.


173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683
173776.4706329683


In [620]:
print(weights)

[array([[-0.04645748, -0.11420034,  0.02332669, ..., -0.0530145 ,
        -0.01270414, -0.06781076],
       [ 0.03650687, -0.00169416,  0.0139566 , ..., -0.01795821,
        -0.03512879,  0.06931611],
       [-0.04107713, -0.03468755, -0.01679333, ...,  0.02514496,
         0.06852584,  0.00804795],
       ...,
       [ 0.04323556, -0.07361189,  0.005151  , ...,  0.03093557,
        -0.00913286, -0.00805881],
       [-0.01044397, -0.01612829, -0.07758249, ...,  0.02867128,
         0.05306512,  0.01180311],
       [ 0.08342361, -0.02342514, -0.00940452, ..., -0.00341371,
         0.05061867, -0.03601588]]), array([[-25.52077447, -26.07231193, -26.00936457, ..., -25.89717348,
        -25.99631162, -25.81178739]])]


**References**
1. http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/
2. https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78