In [2]:
import numpy as np  

In [61]:
def hypothesis(theta, X, logistic=False):
    if logistic:
        return 1/(1 + np.exp(-np.matmul(X, theta)))
    else:
        return np.matmul(X, theta)
        

In [310]:
def cost_function(X, Y, theta, learning_rate, regularization, logistic):
    """Calculates the cost and new theta for the given parameters"""
    
    # Update theta using the grdient descent formula.
    # Remember this requires simultaneous updating.
    prediction = hypothesis(theta, X, logistic)

    m = X.shape[0]

    difference = prediction - Y       

    # Calculate the sum.
    sigma = np.dot((difference)[:,0], X).reshape((-1,1))

    theta_0 = theta[0:1, :] - ((learning_rate / m) * sigma[0:1, :])
    theta_rest = theta[1:, :] * \
                    (1 - (learning_rate * regularization / m)) - ((learning_rate / m) * sigma[1:,:])
    
    # We do not choose the first feature.
    square_theta = theta[1:, :] * theta[1:, :]
    square_theta = np.sum(square_theta, axis=0)
    
    if not logistic:           
        
        square_difference = difference * difference
        
        sigma = square_difference.sum()

        
        cost = (1/(2*m)) * (sigma + regularization * square_theta)
        
    else:
        
        # Calculate the cost using the logistic function.
        log_hypoth = np.log(prediction)
      
        sigma = (Y * log_hypoth + (1-Y) * np.log(1 - prediction)).sum()
        
        cost = - (((1/m) * (sigma)) + (regularization/(2*m)) * square_theta)      

    return cost, np.insert(theta_rest, 0, theta_0, axis=0)


In [317]:
def gradient_descent(X, Y, learning_rate=0.01, max_iters=1000, regularization=0.1, logistic=False):
    """Returns the optimum theta using gradient descent.
       Remember that you may need to Op&miza&on    algorithms:    normalize your data.
       We use regu"""
    
    print("Learning rate", learning_rate)
    
    # Number of training Accuracye examples.
    m = X.shape[0]

    # When true we stop calculating the opAccuracytimum value of theta.
    done = False
    
    # We declare convergence if J(theta) decreases by less than below.
    convergence = 10 ** -3
    
    # Assign theta to be a value of 0s initially.
    theta = np.array([0 for x in range(X.shape[1] + 1)], dtype='float')
    theta = np.reshape(theta, (-1,1))
    # There should be as many thetas as features.
    
    
    # Need to add ones to X
    ones = np.ones((1,m))
    
    # Append the columns of ones to x
    X = np.insert(X, 0, ones, axis=1)        
    
    for i in range(max_iters):
        
        # Output if the maximum number of iterations has been reached.
        if i == max_iters - 1:
            print("The maximum number of iterations has been reached!")
        
        # Need to copy theta
        prev_theta = theta
    
        
        cost, theta = cost_function(X, Y, theta, learning_rate, regularization, logistic)
    
        
        if abs((theta - prev_theta).max(axis=0)) < convergence:
            print("Converged!")
            break            
        
    return X, theta
 
"""
X = [[1], [4], [3], [9], [22], [133], [33], [92], [44], [54], [66], [22], [27]]
Y = [[2], [8], [6], [18], [44], [266], [66], [184], [88], [108], [132], [44], [54]]
Y = np.array(Y, dtype='float')

"""


#X = np.array(X, dtype='float')

#gradient_descehttps://\.com/questions/51822589/compare-a-numpy-array-to-each-element-of-another-onent(X, Y, learning_rate=0.01)



"\nX = [[1], [4], [3], [9], [22], [133], [33], [92], [44], [54], [66], [22], [27]]\nY = [[2], [8], [6], [18], [44], [266], [66], [184], [88], [108], [132], [44], [54]]\nY = np.array(Y, dtype='float')\n\n"

In [318]:
X = [[1], [2], [3], [4], [5], [6], [7], [8]]
Y = [[3], [6], [9], [12], [15], [18], [21], [24]]

X = np.array(X, dtype='float')
Y = np.array(Y, dtype='float')
gradient_descent(X, Y, regularization=0.0001)

Learning rate 0.01
Converged!


(array([[1., 1.],
        [1., 2.],
        [1., 3.],
        [1., 4.],
        [1., 5.],
        [1., 6.],
        [1., 7.],
        [1., 8.]]),
 array([[0.49266742],
        [2.91039308]]))

In [319]:
def predict_class(prob):
    """Returns an array of the hypothesis."""
    return np.where(prob > 0.5, 1, 0)

In [320]:
def feature_scaling(X):
    """Uses mean normalization to scale the input."""
    
    # Calculate the mean of each feature.
    X_mean = np.mean(X, axis=0).reshape((-1,1))
    X_std = np.std(X, axis=0).reshape((-1,1))

    # If the standard deviation is 0 then we encounter problems.
    if X_std.min(axis=0)[0] == 0:
        return None
    
    return (X - X_mean.T) / X_std.T 

In [328]:
def logistic_classification_multi(X, Y, learning_rate=0.01, regularization=0.01, max_iters=1000):
    """Performs logistic regression multiclass classification"""
    # We need to find the classes in Y
    classes = np.unique(Y.flatten())
    
    # Iterate over the classes. Find the class with the highest probabiity.
    # We predict that class as the class that we are going to use for predictions.

    # Probabilities of each class.
    probabilities = [[0,0] for i in range(Y.shape[0])]

    for i in range(classes.shape[0]):
        # Choose close i as the positive class and the others as the negative class.
        Y_temp = np.where(Y == i, 1, 0)
        
        X_temp, theta_temp = gradient_descent(X, Y_temp, learning_rate=learning_rate, 
                                              logistic=True, max_iters=max_iters, regularization=regularization)
        probs = hypothesis(theta_temp, X_temp, True)
        temp_classes = predict_class(probs)
                
        for j in range(len(probabilities)):
            if probs[j, 0] > probabilities[j][0]:
                probabilities[j] = [probs[j, 0], i]
                
    return probabilities
      
X = [[3], [4], [5], [3], [4], [5], [3], [4], [5], [3], [4], [5], [3], [4], [5], [3], [4], [5]]
Y = [[0], [1], [2], [0], [1], [2], [0], [1], [2], [3], [4], [5], [3], [4], [5], [3], [4], [5]]

X = np.array(X, dtype='float')
Y = np.array(Y, dtype='float')

    
logistic_classification_multi(X, Y, learning_rate=0.5, regularization=0.1, max_iters=10000)

Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!
Learning rate 0.5
Converged!


[[0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2],
 [0.43525346375480567, 0],
 [0.1711736992260901, 1],
 [0.38376150131459924, 2]]

In [330]:
def logistic_classification_multi(X, Y, learning_rate=0.01, regularization=0.01, max_iters=1000):
    X, theta = gradient_descent(X, Y, learning_rate=learning_rate, logistic=True, max_iters=max_iters, 
                               regularization=regularization)
    probs = hypothesis(theta, X)
    return predict_class(probs)

X = [[1], [2], [1], [2], [1], [2], [1], [2]]
Y = [[0], [1], [0], [1], [0], [1], [0], [1]]

X = np.array(X, dtype='float',)
Y = np.array(Y, dtype='float')

logistic_classification_multi(X, Y)

Learning rate 0.01
Converged!


array([[0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1]])

In [402]:
class NeuralNetwork:
    
    def __init__(self, X, Y, num_features, *layers):
        """Sets up the hideen layers in the network and the inital values of theta."""
        
        self.number_of_layers = len(layers) + 1
        self.size_of_layers = list(layers)
        self.size_of_layers.insert(0, num_features)
        
        self.theta = []
        self.nodes = []
        
        self.num_features = num_features
        
        self.X = X
        self.Y = Y

    def forward_propagation(self, X_inp):
        """Calcules the hypothesis via feed forward propagation."""
        print(X_inp)
    
        # Insert the X_input into X
        self.nodes[0][1:] = X_inp
        
        for node in self.nodes:
            print(node)
    
        for i in range(len(self.theta)):
            pass
            
    
    
    def train_neural_network(self, X, Y):
        """Trains the neural network model. 
        Layers passed in contains two keys,
        number of layers and array containing
        the size of each layer."""
        
        # Now we need to instantiate the layers.
        # We need to use random initialization this time.
        # Zero initialization will not work.

        # We're going to find epsilon for random initialisation
        epsilon = 2

        self.theta = []
        self.nodes = [] # We're also creating the hidden node matrices.

        # Append the X input nodes into the activation nodes.
        self.nodes.append(np.array([0 for i in range(self.num_features)], dtype='float'))
        # We are using a list and not a 2 dimensional matrix as a two dimensional
        # so that we can store different number of nodes in different layers.
        self.nodes[0] = np.insert(self.nodes[0], 0, 1)

        for i in range(self.number_of_layers - 1):
            self.theta.append(np.random.rand(self.size_of_layers[i+1], 
                                        self.size_of_layers[i] + 1) * (2*epsilon) - (epsilon))
            self.nodes.append(np.zeros(self.size_of_layers[i+1]))
            
            if i != self.number_of_layers - 2:
                self.nodes[i+1] = np.insert(self.nodes[i+1], 0, 1) # Do not want to insert a 1 in the last layer

    
        # Now we need to perform forward propagation.
        self.forward_propagation(X[0])
 

        
       
    
# l and sizes must match
nn = NeuralNetwork(X, Y, X.shape[1], 3, 3, 1)
nn.train_neural_network(X, Y)

[1.]
[1. 1.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
[0.]
