In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("housing/boston_fixed.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [3]:
X = df.drop("MEDV",axis=1)
y = df["MEDV"]

In [4]:
# NN

class NeuralNetwork:
    def __init__(self, layers, nodes, activations, activationFn="ReLU"):
        
        self.batchSize = 50
        
        self.layers = layers # total number of hidden layers
        
        self.nodes = nodes
        # an int array of size [0, ..., Layers + 1]
        # Nodes[0] shall represent the input size (typically 50)
        # Nodes[Layers + 1] shall represent the output size (typically 1)
        # all other Nodes represent the number of nodes (or width) in the hidden layer i
        
        self.nnodes = [nodes[0], nodes[1], nodes[2]]
        # alternative to nodes where each hidden layer of the nueral network is the same size
        
        self.activations = activations
        # activations[0] and activations[Layers + 1] are left unused
        # activations[i] values are labels indicating the activation function used in layer i
        
        self.activationFn = activationFn
        
        self.ACTIVATIONS = {'relu': inplace_relu, 
                            'tanh': inplace_softmax}
        
        self.DERIVATIVES = {'relu': inplace_relu_derivative,
                            'tanh': inplace_tanh_derivative}
        
    def squared_loss(y_true, y_pred):
        """Compute the squared loss for regression.
        Parameters
        ----------
        y_true : array-like or label indicator matrix
            Ground truth (correct) values.
        y_pred : array-like or label indicator matrix
            Predicted values, as returned by a regression estimator.
        Returns
        -------
        loss : float
            The degree to which the samples are correctly predicted.
        """
        return ((y_true - y_pred) ** 2).mean() / 2
    
    
    def d_L(self, y_pred, y_true):
        """ Compute the squared loss for regression.
        """
        return (y_pred - y_true)/self.batchSize
    
    def inplace_relu(X):
        """Compute the rectified linear unit function inplace.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.
        """
        np.maximum(X, 0, out=X)
        
    def inplace_relu_derivative(Z, delta):
        """Apply the derivative of the relu function.
        It exploits the fact that the derivative is a simple function of the output
        value from rectified linear units activation function.
        Parameters
        ----------
        Z : {array-like, sparse matrix}, shape (n_samples, n_features)
            The data which was output from the rectified linear units activation
            function during the forward pass.
        delta : {array-like}, shape (n_samples, n_features)
             The backpropagated error signal to be modified inplace.
        """
        delta[Z == 0] = 0
    
    def inplace_tanh(X):
        """Compute the hyperbolic tan function inplace.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.
        """
        np.tanh(X, out=X)
        
    def inplace_tanh_derivative(Z, delta):
        """Apply the derivative of the hyperbolic tanh function.
        It exploits the fact that the derivative is a simple function of the output
        value from hyperbolic tangent.
        Parameters
        ----------
        Z : {array-like, sparse matrix}, shape (n_samples, n_features)
            The data which was output from the hyperbolic tangent activation
            function during the forward pass.
        delta : {array-like}, shape (n_samples, n_features)
             The backpropagated error signal to be modified inplace.
        """
        delta *= (1 - Z ** 2)
    
    # x = B * M (batch of data points)
    def L(self, x, y):
        avgLoss = 0
        for i in range(self.batchSize):
            avgLoss += L2Loss(x[i], y[i])
        return avgLoss
    
    def activationFunction(self, x, a=.1):
        val = 0
        if self.activationFn == "ReLU":
            val = max(0, x)
        elif self.activationFn == "leakyReLU":
            if x > 0: val = x
            else: val = a * x
        return val
            
    def d_activationFunction(self, x, a=.1):
        val = 0
        if self.activationFn == "ReLU":
            if x > 0: val = 1
            else: val = 0
        elif self.activationFn == "leakyReLU":
            if x > 0: val = 1
            else: val = a
        return val
    
    
    # x = B * M (batch of data points)
    # apply activation function element wise to x
    def applyActivationFn(self, x):
        activationFnVec = np.vectorize(self.activationFunction)
        return activationFnVec(x)
        
    # loss over batch
    # x = B * 1 = vector of predictions
    # y = B * 1 = vector of labals
    # output L(x) has size 1
    def jacobianLossLayer(self, x, y):
        J = {}
        B = len(x)
        for i in range(B):
            J[i] = d_L(x, y, B)
        return pd.series(J)
    
    # x has size B * M
    # output σ(x) has size B * M
    
    def jacobianActivationLayer(self, x, J):
        new_x = x.applymap(d_activationFunction).copy()
        J = J * new_x
        return J
    
    def jacobianDenseLayerInput(self, x):
        pass
    
    def jacobianDenseLayerWeights(self, x):
        pass
    
    def fit(self, Xtrain, ytrain):
        df_train = Xtrain.copy()
        response = ytrain.name
        df_train[response] = ytrain
        
        df_train_batch = df_train.sample(self.batchSize) # get batch
        X = df_train_batch.drop(response, axis=1)
        X["ones"] = 1 # add ones for bias
        y = df_train_batch[response]
        
        X = X.to_numpy()
        y = y.to_numpy()
        
        M = X.shape[1]
        N = nodes[1]
        
        # weights = M * N
        w1 = np.random.normal(size=(M, N)) # initalize weights
        w1 = np.round(w1, 2)
        w1[M-1:] = 0 # initialize biases to 0
        
        h1 = X.dot(w1) # first hidden layer
        
        h1_activation = self.applyActivationFn(h1) # hidden layer
        
        w2 = np.random.normal(size=N) # initialize weights
        w2 = np.round(w2, 2)
        w2[N - 1] = 0 # initialize bias to 0
        
        z = h1.dot(w2) # output layer
        
        return z
        
        
        
        
        
        
            
    

        
    
        

In [37]:
layers = 1
nodes = [50, 4, 1]
activations = [None, "ReLU", None]

NN = NeuralNetwork(layers, nodes, activations)
print("layers:", NN.layers)
print("nodes:", NN.nodes)
print("activations:", NN.activations)
print("activationFn:", NN.activationFn)

z = NN.fit(X, y)
print("dimensitons:", h1.shape)
z[:5]

layers: 1
nodes: [50, 4, 1]
activations: [None, 'ReLU', None]
activationFn: ReLU
dimensitons: (50,)


array([ 817.07153389, -413.37169749, -215.75419819, -412.94675718,
        109.30279391])