In [5]:
import numpy as np  

In [6]:
def hypothesis(theta, X, logistic=False):
    if logistic:
        return 1/(1 + np.exp(-np.matmul(X, theta)))
    else:
        return np.matmul(X, theta)
        

In [7]:
def cost_function(X, Y, theta, learning_rate, regularization, logistic):
    """Calculates the cost and new theta for the given parameters"""
    
    # Update theta using the grdient descent formula.
    # Remember this requires simultaneous updating.
    prediction = hypothesis(theta, X, logistic)

    m = X.shape[0]

    difference = prediction - Y       

    # Calculate the sum.
    sigma = np.dot((difference)[:,0], X).reshape((-1,1))

    theta_0 = theta[0:1, :] - ((learning_rate / m) * sigma[0:1, :])
    theta_rest = theta[1:, :] * \
                    (1 - (learning_rate * regularization / m)) - ((learning_rate / m) * sigma[1:,:])
    
    # We do not choose the first feature.
    square_theta = theta[1:, :] * theta[1:, :]
    square_theta = np.sum(square_theta, axis=0)
    
    if not logistic:           
        
        square_difference = difference * difference
        
        sigma = square_difference.sum()

        
        cost = (1/(2*m)) * (sigma + regularization * square_theta)
        
    else:
        
        # Calculate the cost using the logistic function.
        log_hypoth = np.log(prediction)
      
        sigma = (Y * log_hypoth + (1-Y) * np.log(1 - prediction)).sum()
        
        cost = - (((1/m) * (sigma)) + (regularization/(2*m)) * square_theta)      

    return cost, np.insert(theta_rest, 0, theta_0, axis=0)


In [8]:
def gradient_descent(X, Y, learning_rate=0.01, max_iters=1000, regularization=0.1, logistic=False):
    """Returns the optimum theta using gradient descent.
       Remember that you may need to Op&miza&on    algorithms:    normalize your data.
       We use regu"""
    
    print("Learning rate", learning_rate)
    
    # Number of training Accuracye examples.
    m = X.shape[0]

    # When true we stop calculating the opAccuracytimum value of theta.
    done = False
    
    # We declare convergence if J(theta) decreases by less than below.
    convergence = 10 ** -3
    
    # Assign theta to be a value of 0s initially.
    theta = np.array([0 for x in range(X.shape[1] + 1)], dtype='float')
    theta = np.reshape(theta, (-1,1))
    # There should be as many thetas as features.
    
    
    # Need to add ones to X
    ones = np.ones((1,m))
    
    # Append the columns of ones to x
    X = np.insert(X, 0, ones, axis=1)        
    
    for i in range(max_iters):
        
        # Output if the maximum number of iterations has been reached.
        if i == max_iters - 1:
            print("The maximum number of iterations has been reached!")
        
        # Need to copy theta
        prev_theta = theta
    
        
        cost, theta = cost_function(X, Y, theta, learning_rate, regularization, logistic)
    
        
        if abs((theta - prev_theta).max(axis=0)) < convergence:
            print("Converged!")
            break            
        
    return X, theta

In [9]:
X = [[1], [2], [3], [4], [5], [6], [7], [8]]
Y = [[3], [6], [9], [12], [15], [18], [21], [24]]

X = np.array(X, dtype='float')
Y = np.array(Y, dtype='float')
gradient_descent(X, Y, regularization=0.0001)

Learning rate 0.01
Converged!


(array([[1., 1.],
        [1., 2.],
        [1., 3.],
        [1., 4.],
        [1., 5.],
        [1., 6.],
        [1., 7.],
        [1., 8.]]),
 array([[0.49266742],
        [2.91039308]]))

In [10]:
def predict_class(prob):
    """Returns an array of the hypothesis."""
    return np.where(prob > 0.5, 1, 0)

In [11]:
def feature_scaling(X):
    """Uses mean normalization to scale the input."""
    
    # Calculate the mean of each feature.
    X_mean = np.mean(X, axis=0).reshape((-1,1))
    X_std = np.std(X, axis=0).reshape((-1,1))

    # If the standard deviation is 0 then we encounter problems.
    if X_std.min(axis=0)[0] == 0:
        return None
    
    return (X - X_mean.T) / X_std.T 

In [12]:
def logistic_classification_multi(X, Y, learning_rate=0.01, regularization=0.01, max_iters=1000):
    """Performs logistic regression multiclass classification"""
    # We need to find the classes in Y
    classes = np.unique(Y.flatten())
    
    # Iterate over the classes. Find the class with the highest probabiity.
    # We predict that class as the class that we are going to use for predictions.

    # Probabilities of each class.
    probabilities = [[0,0] for i in range(Y.shape[0])]

    for i in range(classes.shape[0]):
        # Choose close i as the positive class and the others as the negative class.
        Y_temp = np.where(Y == i, 1, 0)
        
        X_temp, theta_temp = gradient_descent(X, Y_temp, learning_rate=learning_rate, 
                                              logistic=True, max_iters=max_iters, regularization=regularization)
        probs = hypothesis(theta_temp, X_temp, True)
        temp_classes = predict_class(probs)
                
        for j in range(len(probabilities)):
            if probs[j, 0] > probabilities[j][0]:
                probabilities[j] = [probs[j, 0], i]
                
    return probabilities
      
X = [[3], [4], [5], [3], [4], [5], [3], [4], [5], [3], [4], [5], [3], [4], [5], [3], [4], [5]]
Y = [[0], [1], [2], [0], [1], [2], [0], [1], [2], [3], [4], [5], [3], [4], [5], [3], [4], [5]]

X = np.array(X, dtype='float')
Y = np.array(Y, dtype='float')

    
logistic_classification_multi(X, Y, learning_rate=0.5, regularization=0.1, max_iters=10000)

Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!


[[0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2]]

In [88]:
def logistic_classification_multi(X, Y, learning_rate=0.01, regularization=0.01, max_iters=1000):
    X, theta = gradient_descent(X, Y, learning_rate=learning_rate, logistic=True, max_iters=max_iters, 
                               regularization=regularization)
    probs = hypothesis(theta, X)
    return predict_class(probs)

logistic_classification_multi(X, Y)

Learning rate 0.01
Converged!


array([[0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1]])

In [293]:
class NeuralNetwork:
    
    def __init__(self, X, Y, num_features, *layers):
        """Sets up the hideen layers in the network and the inital values of theta."""
        
        self.number_of_layers = len(layers) + 1
        self.size_of_layers = list(layers)
        self.size_of_layers.insert(0, num_features)
        
        self.theta = []
        self.nodes = []
        
        self.num_features = num_features
        
        self.X = X
        self.Y = Y
        
        self.regularization = 0.1
        self.learning_rate = 0.01
        
        self.max_iters = 1000
        
        
    def sigmoid_activation(self, np_array):
        """Apply sigmoid activation on the given layer."""
        return 1/(1 + np.exp(-np_array))

    def forward_propagation(self, X_inp):
        """Calcules the hypothesis via feed forward propagation."""
           
        # Insert the X_input into X
        self.nodes[0][1:, :] = X_inp

        for i in range(len(self.theta)):
            if i < len(self.theta) - 1:
                self.nodes[i+1][1:, :] = np.matmul(self.theta[i], self.nodes[i])
                self.nodes[i+1][1:, :] = self.sigmoid_activation(self.nodes[i+1][1:, :])
            else:
                self.nodes[i+1] = np.matmul(self.theta[i], self.nodes[i])
                self.nodes[i+1] = self.sigmoid_activation(self.nodes[i+1])

        return self.nodes[-1]

       
    
    def backward_propagation(self, hypothesis, Y_test, accumulator):
        """Perform backwards propagation which is used to trin the neural network."""
        
        # Going to do backwards propagation again.
        last_error = hypothesis - Y_test.reshape((-1,1))      
        
        # Create a list of the errors.
        errors = [np.zeros((self.nodes[i].shape)) for i in range(1, len(self.nodes) - 1)]
        errors.append(last_error)
        
        # Now we need to actually perform the backwards propagation.
        for i in reversed(range(1, len(self.nodes) - 1)):
            # No bias unit on the final layer. 
            if i == len(self.nodes) - 2:
                temp_error = np.matmul(self.theta[i].T, errors[i])
            else:
                temp_error = np.matmul(self.theta[i].T, errors[i][1:, :])
                    
            errors[i - 1] = temp_error * (self.nodes[i] * (1 - self.nodes[i]))
        
            
            
        # Now we have a list of all the errors.
        for i in range(len(accumulator)):
            if i == len(accumulator) - 1:
                accumulator[i] = accumulator[i] + np.matmul(errors[i], self.nodes[i].T)  
            else:
                accumulator[i] = accumulator[i] + np.matmul(errors[i][1:, :], self.nodes[i].T)
   
        return accumulator

    def gradient_descent(self, delt):
        """This method performs gradient descent using the given delt."""
        
        max_descent = 0
        prev_theta = self.theta
        
        for i in range(len(delt)):
            self.theta[i] = self.theta[i] - self.learning_rate * delt[i]
            difference = prev_theta[i] - self.theta[i]
            
            # Get the maximum
            if np.max(difference) > max_descent:
                max_descent = np.max(difference)
                
        if max_descent < 10 ** -4:
            print("Converged!")
            return True
        
        return False
                
        
            
    def predict(self, X):
        predictions = self.forward_propagation(X)
        
        prediction = np.where(predictions > 0.5, 1, 0)
        
        return prediction
        
            
    
        
    def train_neural_network(self, X, Y):
        """Trains the neural network model. 
        Layers passed in contains two keys,
        number of layers and array containing
        the size of each layer."""
        
        # Now we need to instantiate the layers.
        # We need to use random initialization this time.
        # Zero initialization will not work.

        # We're going to find epsilon for random initialisation
        epsilon = 2

        self.theta = []
        self.nodes = [] # We're also creating the hidden node matrices.

        # Append the X input nodes into the activation nodes.
        self.nodes.append(np.array([0 for i in range(self.num_features)], dtype='float').reshape((-1,1)))
        # We are using a list and not a 2 dimensional matrix as a two dimensional
        # so that we can store different number of nodes in different layers.
        self.nodes[0] = np.insert(self.nodes[0], 0, 1).reshape((-1,1))

        for i in range(self.number_of_layers - 1):
            self.theta.append(np.random.rand(self.size_of_layers[i+1], 
                                        self.size_of_layers[i] + 1) * (2*epsilon) - (epsilon))
            self.nodes.append(np.zeros(self.size_of_layers[i+1]))
            
            if i != self.number_of_layers - 2:
                self.nodes[i+1] = np.insert(self.nodes[i+1], 0, 1).reshape((-1,1)) 
                # Do not want to insert a 1 in the last layer
            else:
                self.nodes[i+1] = self.nodes[i+1].reshape((-1,1))
                
        # We need an accumulator.
        accumulator = [np.zeros((self.theta[i].shape[0], self.theta[i].shape[1])) \
                       for i in range(len(self.theta))]        
    
        # Now we need to perform forward propagation.
        # Perform backward propagatio on all the examples.
        for j in range(self.max_iters):
            for i in range(X.shape[0]):
                
                hypothesis = self.forward_propagation(X[i, :].reshape((-1,1)))
                accumulator = self.backward_propagation(hypothesis, Y[i, :], accumulator)

            m = X.shape[0]

            delt = [(1/m) * accumulator[i] for i in range(len(self.theta))]

            # Add regularization. We do not regularize the bias term.
            for i in range(len(delt)):
                delt[i][:, 1:] = delt[i][:, 1:] + self.regularization * self.theta[i][:, 1:]


            done = self.gradient_descent(delt)
            
            if j == self.max_iters - 1:
                print("The maximum number of iterations has been reached!")
            
            if done:
                print("Converged!")
                break
            
        
            
      
X = []
Y = []
        
for i in range(1000):
    if i % 2 == 0:
        X.append([1])
        Y.append([0])
    else:
        X.append([2])
        Y.append([1])
        



X = np.array(X, dtype='float')
Y = np.array(Y, dtype='float')       
    
# l and sizes must match
nn = NeuralNetwork(X, Y, X.shape[1], 5, 5, Y.shape[1])
nn.train_neural_network(X, Y)

example = np.array([[1]])


nn.predict(np.array([2]))

Converged!
Converged!


array([[0]])