# My Neural Network
The following code implements a neural network from scratch using only `numpy`.

## Implementation of Activation functions

In [13]:
import numpy as np

class Activation:
    def __init__(self, f, df):
        """
        Initialize Activation object. Represents the activation fn for a layer
        
        :params f: function that takes column vector and returns column vector
        :params df: corresponding derivative of f
        """
        self.f = f
        self.df = df
    
    def call(self, x):
        return self.f(x)
    
    def call_df(self ,x):
        return self.df(x)

In [153]:
def relu(x):
    """
    ReLU activation function

    :params x: np vector
    :returns: np vector
    """
    x[x < 0] = 0

    return x
def d_relu(x):
    """
    ReLU derivative
    
    :params x: np vector of shape mx1
    :returns: np matrix of shape mxm
    """
    x = relu(x)
    x[x > 0] = 1
    
    return np.diag(x.T[0])

def sigmoid(x):
    """
    Sigmoid activation function

    
    :params x: np vector
    :returns: np vector
    """    
    return 1/(1+np.exp(-x))

def d_sigmoid(x):
    """
    Sigmoid derivative
    
    :params x: np vector of shape mx1
    :returns: np matrix of shape mxm
    """
    exp = np.exp(-x)
    return np.diag((exp/(1+exp)**2).T[0])

def linear(x):
    """
    Linear activation function
    
    :params x: np vector of shape mx1
    :returns: np vector of shape mx1
    """
    return x

def d_linear(x):
    """
    Linear derivative
    
    :params x: np vector of shape mx1
    :returns: np matrix of shape mxm
    """
    m, n = x.shape
    return np.identity(m)

activation_functions = {
    "relu": Activation(relu, d_relu),
    "sigmoid": Activation(sigmoid, d_sigmoid),
    "linear": Activation(linear, d_linear)
}

## Implementation of a loss function

In [15]:
class Loss:
    def __init__(self, f, df):
        """
        Initialize Loss object. Represents the loss function for a model
        
        :params f: fn taking ans cv and prediction cv and return loss (1, 1)
        :params df: corresponding derivative of f w.r.t prediction
        """
        self.f = f
        self.df = df
    
    def call(self, g, a):
        """
        :params g: guesses of model. (n, 1)
        :params a: answers to guesses. (n, 1)
        """
        return self.f(g, a)
    
    def call_df(self, g, a, x):
        return self.df(g, a, x)

In [16]:
def hinge_loss(g, a):
    """
    Hinge loss
    
    :params g: column vector of guesses for all data pt
    :params a: column vector of answers
    """
    x = 1 - np.multiply(g, a)
    x[x < 0] = 0
    
    return np.sum(x)

def d_hinge_loss(g, a, x):
    """
    Hinge loss derivative for single data point
    
    :params g: model guess
    :params a: correct answer
    :params x: column vector for data pt of shape mx1
    :returns: column vector of shape mx1
    """
    m, n = x.shape
    if a*g > 1:
        return np.zeros((m, 1))
    else:
        return -a*x

def NLL(g, a):
    """
    Negative least likelihood
    
    :params g: column vector of guesses for all data pt
    :p
    """
    

loss_functions = {
    "hinge": Loss(hinge_loss, d_hinge_loss)
}

## Implementation of a neural network layer

In [184]:
class Layer:
    def __init__(self, w, af, output=False):
        """
        Initialize a Layer object.
        Only can perform stochastic gradient descent
        
        :params w: mxn np matrix where m is input vec length, n is no. of units
        :params af: activation function object
        :params output: boolean for whether layer is output layer or intermediate
        """
        m, n = w.shape
        
        self.weights = w
        self.activation_fn = af
        
        self.input_dim = (m, 1)
        self.output_dim = (n, 1)
        
        self.output_layer = output
    
    def call(self, x):
        """
        run input vector through layer

        :params x: column vector of size self.input_dim
        :returns: column vector of size ouput_dim
        """
        if x.shape != self.input_dim:
            raise Exception("Wrong input dimension")
        return self.activation_fn.call(self.weights.T.dot(x))
    
    def backprop_call(self, x):
        """
        run input vector through layer to produce outputs for backprop
        
        :params x: column vector of size self.input_dim
        :returns: tuple (z, a), where z is tf by units, and a is activation
        """
        if x.shape != self.input_dim:
            raise Exception("Wrong input dimension")
        z = self.weights.T.dot(x)
        a = self.activation_fn.call(z)
        
        return(z, a)
    
    def dadz(self, z):
        """
        Get dadz for backprop. Refer to the math
        
        :params z: tf from this layer units
        :returns: nxn matrix
        """
        return self.activation_fn.call_df(z)
        
    
    def dldz(self, dldz_front, w_front, z):
        """
        get dldz for backprop. Refer to the math.
        
        :params dldz_front: column vector of dldz for layer in front (k, 1)
        :params w_front: weights of layer in front (n, k)
        :params z: output of current layer tf for the given data point (n, 1)
        :returns: column vector of dldz (n, 1)
        """
        if self.output: raise Exception("Output layer")
        return activation_fn.call_df(z).dot(w_front).dot(dldz_front)
        
    
    def dldw(self, a, dldz):
        """
        get gradient of weights for backprop. Refer to the math.
        
        :params dldz: (n, 1)
        :params a: input from layer below (m, 1)
        :returns: column vector of dldw (m, n)
        """
        return a.dot(dldz.T)
    
    
    def sgd_step(self, dldw, step_size):
        """
        update weights according to given gradient and step size
        
        :params dldw: gradient of current weights w.r.t data pt (m, n)
        :params step_size: step size to take. number
        :returns: True
        """
        self.weights = self.weights - dldw*step_size
        

## Neural Network Implementation

In [269]:
class NeuralNetwork:
    def __init__(self, params):
        """
        create a neural network
        
        :params params: parameters of neural network
        """
        self.loss_fn = loss_functions[params["loss"]]
        
        def create_layer(input_dim, unit_no, af, output=False):
            #no gain
            w = np.random.normal(size=(input_dim, unit_no))
            return Layer(w, af, output)
        
        self.layers = []
        for idx, layer in enumerate(params["layers"]):
            unit_no, af = layer
            layer_obj = None
            if idx == 0:
                layer_obj = create_layer(\
                                       params["input_dim"],\
                                       unit_no,\
                                       activation_functions[af]
                                      )
            else:
                unit_no_before, _ = params["layers"][idx - 1]
                layer_obj = create_layer(\
                                         unit_no_before,\
                                         unit_no,\
                                         activation_functions[af]
                                         )
            if idx == len(params["layers"]) - 1:
                layer_obj.output_layer = True
                                         
            self.layers.append(layer_obj)
            
    def run(self, X):
        """
        Run data through the neural network
        
        :params X: mxn matrix where m is data dim and n is no of data pt
        """
        m, n = X.shape
        Y = []
        #X = np.concatenate((X, np.ones((1, n))), axis=0)
        for x in X.T:
            x = np.array([x]).T
            for layer in self.layers:
                x = layer.call(x)
            Y.append(x[0])
        return np.array(Y)
        
    def train(self, data, labels, T=1000):
        """
        Train model using stochastic gradient descent
        
        :params data: mxn matrix where m is data dim and n is no. of data pt
        :params labels: 1xn matrix for the labels
        """
        m, n = data.shape
        d_loss = self.loss_fn.call_df
        
        step_fn = lambda t: 1/(t+1)
        for t in range(T):
            i = np.random.randint(n)
            data_pt = np.array([data.T[i]]).T
            label = np.array([labels[0,i]])
            #print(label)
            layer_outputs = [(None, data_pt)]
            x = data_pt
            #Forward pass to compute layer outputs
            for layer in self.layers:
                z, a = layer.backprop_call(x)
                x = a
                layer_outputs.append((z, a))
            prev_dldz = None
            next_weights = None
            _, last_a = layer_outputs[-1]
            pred = 1
            if last_a <= 0:
                pred = -1
            #print("prediction:", pred, "actual:", label)
            x = data_pt
            for idx in range(len(self.layers)-1, -1, -1):
                z, a = layer_outputs[idx+1]
                layer = self.layers[idx]
                _, prev_a = layer_outputs[idx]
                #prev_a = np.array([np.append(prev_a.T[0], 1)]).T
                if layer.output_layer:
                    print("guess: ",a[0,0],", label: ", label[0],",data pt: ", x)
                    dlda = d_loss(a[0,0], label[0], x)
                    dadz = layer.dadz(z)
                    dldz = dadz.dot(dlda)
                    dldw = prev_a.dot(dldz.T)
                    print("dlda", dlda, "dadz", dadz, "dldw", dldw)
                    next_weights = np.copy(layer.weights)
                    layer.sgd_step(dldw, step_fn(t))
                    prev_dldz = dldz
                else:
                    dadz = layer.dadz(z)
                    dldz = prev_dldz.dot(next_weights.T).dot(dadz)
                    prev_dldz = dldz
                    dldw = prev_a.dot(dldz)
                    next_weights = np.copy(layer.weights)
                    #print("old", idx, next_weights)
                    layer.sgd_step(dldw, step_fn(t))
                    #print("new", idx, layer.weights)
                #print('old weights', next_weights)
        print("Training complete")

                    
        
        
        

## Testing Model
Test model on dataset:
```
data: [-1, -1, 1, 1, ...]
labels: [-1, -1, 1, 1, ...]
```

In [270]:
def accuracy(g, a):
    """
    metric for how many guesses are correct
    """
    score = 0
    n, _ = g.shape
    for i, x in enumerate(g):
        if x[0] == a[0, i]:
            score += 1
    return score/n

    

In [274]:
example_params = {
    "loss": "hinge",
    "layers": [(2, "relu"),(2, "relu"),(1, "linear")],
    "input_dim": 1
}

nn = NeuralNetwork(example_params)

untrained_acc = 0
for i in range(30):
    labels = np.random.rand(1, 1000)
    labels[labels > 0.5] = 1
    labels[labels <= 0.5] = -1
    g = nn.run(labels)
    g[g > 0] = 1
    g[g<=0] = -1
    predictions = g
    untrained_acc += accuracy(g, labels)
print("Untrained average accuracy:", untrained_acc/30)

Untrained average accuracy: 1.0


In [275]:
data = np.random.rand(1, 10000)
data[data > 0.5] = 1
data[data <= 0.5] = -1
nn.train(data, data, T=10)

guess:  0.14128915889491445 , label:  1.0 ,data pt:  [[1.]]
dlda [[-1.]] dadz [[1.]] dldw [[-0.0106326 ]
 [-0.07929505]]
guess:  4.340532508598916 , label:  1.0 ,data pt:  [[1.]]
dlda [[0.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  4.340532508598916 , label:  1.0 ,data pt:  [[1.]]
dlda [[0.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  4.340532508598916 , label:  1.0 ,data pt:  [[1.]]
dlda [[0.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  0.0 , label:  -1.0 ,data pt:  [[-1.]]
dlda [[-1.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  4.340532508598916 , label:  1.0 ,data pt:  [[1.]]
dlda [[0.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  4.340532508598916 , label:  1.0 ,data pt:  [[1.]]
dlda [[0.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  0.0 , label:  -1.0 ,data pt:  [[-1.]]
dlda [[-1.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  4.340532508598916 , label:  1.0 ,data pt:  [[1.]]
dlda [[0.]] dadz [[1.]] dldw [[0.]
 [0.]]
guess:  0.0 , label:  -1.0 ,data pt:  [[-1.]]
dlda [[-1.]] dadz [[1.]] dldw [[0.]
 [0.]]
Training com

In [260]:
trained_acc = 0
for i in range(30):
    labels = np.random.rand(1, 10)
    labels[labels > 0.5] = 1
    labels[labels <= 0.5] = -1
    g = nn.run(labels)
    g[g > 0.5] = 1
    g[g<=0.5] = -1
    predictions = g
    trained_acc += accuracy(g, labels)
print("Trained average accuracy:", trained_acc/30)

Trained average accuracy: 0.49000000000000005
