In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys

In [2]:
class Neuron:
    """Neuron in neural network
    
    __init__(num_in,bias)
        num_in = number of features to input to neuron (creates the number of weights on the neuron)
        bias = True if a bias term should be included for w*x+b 
               False if no bias term should be included for w*x+b
               default=True
           

    forward(x)
        x = input to the neuron for w*x + b
    
    
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        gradient_x = the local gradient of input x
        gradient_w = the local gradient of the neuron weights w
        
        return the local gradients * the upstream gradient
    """
    
    def __init__(self,num_in,bias=True):
        self.gradient_x = None
        self.gradient_w = None
        
        if bias:
            num_in = num_in + 1
            
        self.weights = np.random.uniform(-1,1,size=(num_in,1))
        self.x = None
        self.bias=bias
    
    def forward(self,x):
        if self.bias == True:
            x = np.concatenate((x,[[1]]),axis=1)
            
        self.x = x
        return np.matmul(x,self.weights)
                                
    def backward(self,gradient_l=1):
        if self.x is not None:
            self.gradient_x = self.weights.transpose()*gradient_l
            self.gradient_w = self.x.transpose()*gradient_l
            return self.gradient_x,self.gradient_w
        else:
            return None          

In [3]:
class ReLu:
    """ReLu Activation function
    
    forward(x)
    x = the input to sigmoid function max(0,x)
        
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        self.gradient = the local gradient of the ReLu activation function
        
        return the local gradients * the upstream gradient
    """
    def __init__(self):
        self.gradient = None
        self.x = None
    
    def forward(self,x):
        self.x = x
        if x > 0:
            return x
        else:
            return 0
    
    def backward(self,gradient_l=1):
        if self.x is not None:
            if self.x > 0:
                self.gradient = gradient_l
                return gradient_l
            else:
                self.gradient = sys.float_info.epsilon
                return np.array([[sys.float_info.epsilon]])   
        else:
            return None

In [4]:
class Sigmoid:
    """Sigmoid Activation function
    
    forward(x)
        x = the input to sigmoid function f(x) = 1 / (1+exp(-x))
        
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        self.gradient = the local gradient of the Sigmoid activation function
        
        return the local gradients * the upstream gradient
    """
    def __init__(self):
        self.gradient=None
        self.x = None
        
    def forward(self,x):
        self.x = x
        return 1/(1+np.exp(-x))
    
    def backward(self,gradient_l=1):
        if self.x is not None:
            self.gradient = 1/(1+np.exp(-self.x))*(1-1/(1+np.exp(-self.x)))*gradient_l
            return self.gradient
        else:
            return None

In [5]:
class MSE:
    """
    Binary Cross Entropy function
    
    forward(y_hat,y)
        y_hat = the predicted classification probability 0-1
        y = the training example actual label 0 or 1
        binary cross entropy = -(y*ln(y_hat) + (1-y)*ln(1-y_hat))
        
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        self.gradient = the local gradient of the MSE
        
        return the local gradients * the upstream gradient
    """
    def __init__(self):
        self.gradient = None
        self.y = None
        self.y_hat = None
        
    def forward(self,y_hat,y):
        self.y_hat = y_hat
        self.y = y
        return (y_hat-y)**2
    
    def backward(self,gradient_l=1):
        if self.y is not None and self.y_hat is not None:
            self.gradient = -2 * (self.y-self.y_hat)
            return self.gradient
        else:
            return None
        
    def __str__(self):
        return "loss"

In [6]:
class BCE:
    """Binary Cross Entropy function
    
    forward(y_hat,y)
        y_hat = the predicted classification probability 0-1
        y = the training example actual label 0 or 1
        binary cross entropy = -(y*ln(y_hat) + (1-y)*ln(1-y_hat))
        
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        self.gradient = the local gradient of the BCE 
        
        return the local gradients * the upstream gradient
    """
    def __init__(self):
        self.gradient = None
        self.y = None
        self.y_hat = None
    
    def forward(self,y_hat,y):
        self.y_hat = y_hat
        self.y = y
        return -(y*np.log(max(sys.float_info.epsilon,y_hat-sys.float_info.epsilon)) + \
                 (1-y)*np.log(1-max(sys.float_info.epsilon,y_hat-sys.float_info.epsilon)))
    
    def backward(self,gradient_l=1):
        if self.y is not None and self.y_hat is not None:
            self.gradient = -self.y*1/(max(sys.float_info.epsilon,self.y_hat)) + \
                              1/max(sys.float_info.epsilon,1-self.y_hat)*(1-self.y)
            return self.gradient
        else:
            return None
        
    def __str__(self):
        return "loss"

In [7]:
class CE:
    """Cross Entropy function
    
    forward(y_hat,y)
        y_hat = the predicted classification probability 0-1
        y = the training example actual label 0 or 1
        binary cross entropy = -y_vector * ln(y_hat_vector)
        
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        self.gradient = the local gradient of the CE 
        
        return the local gradients * the upstream gradient
    """
    def __init__(self):
        self.gradient = None
        self.y = None
        self.y_hat = None
        
    def forward(self,y_hat,y):
        self.y_hat = y_hat
        self.y = y
        return -np.sum(y_hat*y)
    
    def backward(self,gradient_l=1):
        if self.y is not None and self.y_hat is not None:
            self.gradient = -self.y*1/np.maximum(sys.float_info.epsilon,self.y_hat)
            return self.gradient
        else:
            return None
        
    def __str__(self):
        return "loss"

In [8]:
class softmax:
    """Softmax function
    
    forward(o)
        o = the output of a fully connected layer for softmax normalization 
        output = exp(oi) / sum over all exp([o1,o2,...on])
        
    backward(gradient_l)
        gradient_l = the upstream gradient for backpropogation
        self.gradient = the local gradient of the softmax
        
        return the local gradients * the upstream gradient
    """
    def __init__(self,n_classes=3):
        self.gradient = None
        self.o = None
        self.p = None
        self.n_classes = 3

    def forward(self,o):
        self.o = o
        self.p = np.exp(o)/np.sum(np.exp(o))
        return self.p
    
    def backward(self,gradient_l=None):
        if self.o is not None:
            # fix size...
            if gradient_l.shape[1] > self.o.shape[1]:
                gradient_l = gradient_l[:,:self.o.shape[1]]
            
            p1 = self.p*(1-self.p)
            self.gradient = -1*np.matmul(self.p.transpose(),self.p)
            np.fill_diagonal(self.gradient,p1)
            self.gradient = np.expand_dims(np.sum(self.gradient*gradient_l,1),0)
            return self.gradient
        else:
            return None
        
    def __str__(self):
        return "softmax"

In [9]:
class neural_layer:
    """Neural Network layer
    
    __init__(input_,output_,bias=True)
        input_ = the number of features in the training example
        output_ = the number of neurons to use in the neural network layer
        bias = True if a bias term should be included for w*x+b 
               False if no bias term should be included for w*x+b
               default=True
    
    forward(x):
        x = input to the neuron for W*x + B
        
    backward(gradient_l):
        gradient_l = the upstream gradient for backpropogation
        gradients has gradient_x and gradient_w for each neuron in the neural network layer
        
        return the local gradients * the upstream gradient
    """
    def __init__(self,input_= 1,output_= 1,bias=True):
        self.output_size = output_
        self.input_size = input_
        self.neurons = [Neuron(input_,bias) for _ in range(output_)]
        
    def forward(self,x):
        outputs = [neuron.forward(x) for neuron in self.neurons]
        return np.concatenate(outputs,1)
    
    def backward(self,gradient_l=None):
        if gradient_l is None:
            return None
        
        weight_gradients = []
        input_gradient = 0
        gradient_l = gradient_l.squeeze(0)  
        
        for i,neuron in enumerate(self.neurons):
            temp_gradient = neuron.backward(gradient_l[i])
            weight_gradients.append(temp_gradient[1])
            input_gradient = input_gradient + temp_gradient[0]
            
        gradients = (input_gradient,weight_gradients)
                    
        return gradients
    
    def __str__(self):
        return "neuron"

In [10]:
class action_layer:
    """Activation layer
    
    __init__(output_,a_type="ReLu")
        output_ = the number of activation units to use in the neural network layer
        a_type = the type of activation unit you want to use: "ReLu" or "Sigmoid"
    
    forward(x):
        x = input to the activation layer
        return the activation evaluation
        
    backward(gradient_l):
        gradient_l = the upstream gradient for backpropogation
        gradients has the local gradients for each activation unit
        
        return the local gradients * the upstream gradient
    """
    def __init__(self,output_= 1,a_type="ReLu"):
        
        self.output_size = output_
        
        if a_type == "ReLu":
            self.activations = [ReLu() for _ in range(output_)]
        if a_type == "Sigmoid":
            self.activations = [Sigmoid() for _ in range(output_)]
        
    def forward(self,x):
        output = []
        x = x.squeeze(0)
        for i,activation in enumerate(self.activations):
            output.append(activation.forward(x[i]))
        return np.expand_dims(output,0)
    
    def backward(self,gradient_l=None):
        if gradient_l is None:
            return None
        
        gradients = []
        gradient_l = gradient_l.squeeze(0)     
                
        
        for i,activation in enumerate(self.activations):
            gradients.append(activation.backward(gradient_l[i]))
            
        return np.expand_dims(gradients,0)
    
    def __str__(self):
        return "activation"

In [11]:
class neural_network:
    """Neural Network
    
    __init__(model=[])
        model = a list [] of all the neural network layer objects and activation layer objects. The forward and backward execution
                is sequential in regards to the order of the list passed in to model
    
    forward(x)
        x = the input training example to the neural network which will pass through all the layers of neural network and activation
            layers to classify or regress
    
    backward(gradient_l)
        gradient_l = the upstream gradient from the loss function to the neural network

        return all the gradients calculated throughout all the layers  
        
    model_weights()
        return a list with all the model weights for each neural network layer with neurons
    """
    def __init__(self,model=[]):
        self.model = model
        self.gradients = []
        
    def forward(self,x):
        for obj in self.model:
            x = obj.forward(x)    
        return x
    
    def backward(self,gradient_l=None):
        self.gradients = []
        for obj in self.model[::-1]:
            if str(obj) == "activation":
                tup = ("activation",obj.backward(gradient_l))
                self.gradients.append(tup)
                gradient_l = tup[1]
            
            elif str(obj) == "neuron":
                tup = ("neuron",obj.backward(gradient_l))
                self.gradients.append(tup)
                
                if gradient_l is None:
                    gradient_l = None
                    continue
                
                gradient_l = tup[1][0]
                
            elif str(obj) == "softmax":
                tup = ("softmax",obj.backward(gradient_l))
                self.gradients.append(tup)
                gradient_l = tup[1]

        self.gradients = self.gradients[::-1]
        return self.gradients
    
    def get_weights(self):
        weights = []
        layer_num = 0
        for obj in self.model:
            if str(obj) == 'neuron':
                layer_weights = []
                for neuron in obj.neurons:
                    layer_weights.append(neuron.weights)
                weights.append(('layer'+str(layer_num),layer_weights))
                layer_num += 1
        return weights
        

In [12]:
class optimizer:
    
    def __init__(self,model = None,alpha=0.05):
        self.model = model
        self.loss_fn = loss_fn
        self.alpha = alpha

    def step(self):
        if model is not None:
            for obj,grad in zip(self.model.model,self.model.gradients):
                if grad[0] == 'neuron':
                    for n,grad_update in zip(obj.neurons,grad[1][1]):
                        n.weights = n.weights - self.alpha*grad_update

# Create Classification Dataset (use the breast cancer dataset)
* Download and use the breast cancer dataset with sklearn
* standard normalize the data

In [13]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

In [14]:
X = data['data']
X = X - np.mean(X,0)
X = X / np.std(X,0)
Y = data['target']

# Attempt to do logistic regression proof of concept

In [15]:
neuron = Neuron(30)

In [16]:
activation = Sigmoid()

In [17]:
loss_fn = BCE()

In [18]:
alpha = 0.5
print_every = 1
for i in range(10):
    mean_loss = []
    for x,y in zip(X,Y):
        x = np.expand_dims(x,0)

        # forward pass
        output = neuron.forward(x)
        a = activation.forward(output)
        
        loss = loss_fn.forward(a,y)
        mean_loss.append(loss)

        # backward pass
        loss_grad = loss_fn.backward()
        a_grad = activation.backward(loss_grad)
        _,weight_grad = neuron.backward(a_grad)

        # update model
        neuron.weights = neuron.weights - alpha*weight_grad
        
    if i % print_every == 0:
        print("epoch {}: loss = {}".format(i,np.mean(mean_loss)))

epoch 0: loss = [[0.21130652]]
epoch 1: loss = [[0.13074446]]
epoch 2: loss = [[0.15526014]]
epoch 3: loss = [[0.18240245]]
epoch 4: loss = [[0.14471847]]
epoch 5: loss = [[0.1039467]]
epoch 6: loss = [[0.08903964]]
epoch 7: loss = [[0.16790819]]
epoch 8: loss = [[0.11648342]]
epoch 9: loss = [[0.12915032]]


In [19]:
y_predictions = []
for x,y in zip(X,Y):
    x = np.expand_dims(x,0)

    # forward pass
    output = neuron.forward(x)
    a = activation.forward(output)
    
    y_predictions.append(a)

In [20]:
y_pred = np.round(np.array(y_predictions).squeeze())

In [21]:
accuracy = np.mean(y_pred == Y)
print(accuracy)

0.9789103690685413


# Neural Network Classification proof of concept

In [22]:
model = [neural_layer(30,10),softmax(10),neural_layer(10,5),action_layer(5,"Sigmoid"),neural_layer(5,1),action_layer(a_type='Sigmoid')]
nn = neural_network(model)
loss_fn = BCE()

alpha = 0.05

optimize = optimizer(nn,alpha)

In [23]:
print_every = 5
for i in range(100):
    mean_loss = []
    for x,y in zip(X,Y):
        x = np.expand_dims(x,0)

        # forward pass
        output = nn.forward(x)
        loss = loss_fn.forward(output,y)
        mean_loss.append(loss)

        # backward pass
        loss_back = loss_fn.backward()
        nn_back = nn.backward(loss_back)

        # update weights based on gradient
        optimize.step()
    if i % print_every == 0:
        print("epoch {}: loss = {}".format(i,np.mean(mean_loss)))

epoch 0: loss = 0.6001965922262358
epoch 5: loss = 0.0855513015127892
epoch 10: loss = 0.05689399736160389
epoch 15: loss = 0.045669018097616094
epoch 20: loss = 0.03997409560327071
epoch 25: loss = 0.03632225472235258
epoch 30: loss = 0.03326697022480707
epoch 35: loss = 0.029448062135308428
epoch 40: loss = 0.026539116424643086
epoch 45: loss = 0.023702383063878462
epoch 50: loss = 0.02129557995823119
epoch 55: loss = 0.01926942255645019
epoch 60: loss = 0.017489707620058234
epoch 65: loss = 0.012639474556840558
epoch 70: loss = 0.007315344330069655
epoch 75: loss = 0.0045712595510752066
epoch 80: loss = 0.003265488643004485
epoch 85: loss = 0.0025534488927371965
epoch 90: loss = 0.002097836955218203
epoch 95: loss = 0.0017786843291049143


In [24]:
y_predictions = []
for x,y in zip(X,Y):
    x = np.expand_dims(x,0)

    # forward pass
    output = nn.forward(x)
    
    y_predictions.append(output)

In [25]:
y_pred = np.round(np.array(y_predictions).squeeze())

In [26]:
accuracy = np.mean(y_pred == Y)
print(accuracy)

1.0


# Create Classification Dataset (Iris)
* Download and use the breast cancer dataset with sklearn
* standard normalize the data

In [27]:
from sklearn.datasets import load_iris
iris = load_iris()

In [28]:
X = iris.data  # we only take the first two features.
Y = iris.target

In [29]:
print(X.shape)

(150, 4)


In [30]:
X = X - np.mean(X,0)
X = X / np.std(X,0)

In [31]:
n_values = np.max(Y)+1
Y = np.eye(n_values)[Y]

In [32]:
model = [neural_layer(4,10),action_layer(10,"ReLu"),neural_layer(10,5),softmax(5),neural_layer(5,3),softmax(3)]
nn = neural_network(model)
loss_fn = CE()

alpha = 0.05

optimize = optimizer(nn,alpha)

In [33]:
print_every = 5
for i in range(40):
    mean_loss = []
    for x,y in zip(X,Y):
        x = np.expand_dims(x,0)

        # forward pass
        output = nn.forward(x)
        loss = loss_fn.forward(output,y)
        mean_loss.append(loss)

        # backward pass
        loss_back = loss_fn.backward()
        nn_back = nn.backward(loss_back)

        # update weights based on gradient
        optimize.step()
        
    if i % print_every == 0:
        print("epoch {}: loss = {}".format(i,np.mean(mean_loss)))

epoch 0: loss = -0.604351507393727
epoch 5: loss = -0.8962895936953006
epoch 10: loss = -0.9549603063334765
epoch 15: loss = -0.9666086656608385
epoch 20: loss = -0.9634456576638566
epoch 25: loss = -0.9639632463232573
epoch 30: loss = -0.9670948405623269
epoch 35: loss = -0.9560960001618146


In [34]:
y_predictions = []
for x,y in zip(X,Y):
    x = np.expand_dims(x,0)

    # forward pass
    output = nn.forward(x)
    
    y_predictions.append(output)

In [35]:
y_pred = np.argmax(np.array(y_predictions).squeeze(),1)

In [36]:
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [37]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [38]:
accuracy = np.mean(np.argmax(Y,1) == y_pred)

In [39]:
print(accuracy)

0.9733333333333334
