In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math


In [15]:
class ML(object):
    """
    class for implementing the machine learning process
    used for the 2 layer case.  A deep learning version is coming soon
    """
    
    ## constructor
    def __init__(self, size, input_layer, true_values, testing_input, testing_true, layers_dims, learning_rate = 0.0075, hidden_function = 'relu', last_function = 'sigmoid', num_iterations = 3000, print_cost = False, plot_cost = True):
        self.size = size
        self.X = input_layer
        self.Y = true_values
        self.test_X = testing_input
        self.test_Y = testing_true
        self.dimensions = layers_dims
        self.learning_rate = learning_rate
        self.last_function = last_function
        self.hidden_function = hidden_function
        self.num_iterations = num_iterations
        self.print_cost = False
        self.final_paramters = {}
        self.plot_cost_bool = plot_cost
        
    ##function to list all possible commands
    def help_me(self):
        print(" mandatory initializer variables: ", 
        "    \033[1m size (int):\033[0m the number of layers including input and output layers",
        "    \033[1m input_layer (array):\033[0m the array of input variables for every instance", 
        "    \033[1m true_values (array):\033[0m the array of true values to train with",
        "    \033[1m testing_input (array):\033[0m in the same form as input-layer, used to test the final solution",
        "    \033[1m testing_true (array):\033[0m in the same form as output-layer, used to test the final solution",
        "   optional variables for initializer:\033[0m", 
        "    \033[1m layers_dims (array):\033[0m array of dimensions of each layer",
        "    \033[1m learning_rate (float):\033[0m defualt set to 0.0075 determines step size of gradient descent",
        "    \033[1m hidden_function (string):\033[0m default set to 'relu', determines activation function of hidden layer",
        "    \033[1m last_function (string):\033[0m default set to 'sigmoid', determines activation function of output layer",
        "    \033[1m num_iteration (int):\033[0m default set to 3000, determines number of iterations in gradient descent phase",
        "    \033[1m print_cost (boolean):\033[0m default set to False, when true prints the cost at every 100th iteration",
        "    \033[1m plot_cost (boolean):\033[0m default set to True, when true makes plot of output of cost function",
        "\033[1m set_layers_dims(array layers_dims):\033[0m sets the dimensions (number of nodes) of each layer",
        "\033[1m set_learning_rate(int rate):\033[0m set the learning rate for gradient descent",
        "\033[1m set_hidden_function(string function):\033[0m set the function for the hidden layer. options:",
        "    \033[1m 'relu':\033[0m use the Rectified Linear Unit",
        "    \033[1m 'sigmoid':\033[0m use the sigmpoid function",
        "    \033[1m 'softmax':\033[0m use the Softmax function ***WIP***",
        "\033[1m set_last_function(string function):\033[0m set the function for the hidden layer. options same as for set_hidden_function",
        "\033[1m set_num_iterations(int num):\033[0m set the number of iterations for the gradient descent",
        "\033[1m set_print_cost(bool value):\033[0m turn on/off printing every 100th iteration of gradient descent",
        "\033[1m set_print_cost(bool value):\033[0m turn on/off plotting the cost function output",
        "\033[1m set_final_params(array parameters):\033[0m manually set final parameters", 
        "\033[1m run_machine():\033[0m runs the Machine learning algorithm",
        "\033[1m machine_accuracy():\033[0m compares the accuracy of the machine solution to weights and biases using the training data and the testing data",
        sep = "\n")
        
        
    ## method for recieving number of dimensions for each layer
    ## TODO add a posibility to iterate and find ideal dimensions
    def set_layers_dims(self, layers_dims):
        if len(layers_dims) != self.size:
            print("you must enter dimensions for "+str(size)+" layers or reset the size")
        else:
            self.dimensions = layers_dims
            
    ##methods to update the various varaibles
    def set_learning_rate(self, rate):
        self.learning_rate = rate
        
    def set_num_iterations(self, num):
        self.num_iterations = num
        
    def set_print_cost(self, boo):
        self.print_cost = boo
        
    def set_plot_cost(self, boo):
        self.plot_cost = boo
        
    def set_final_params(self, params):
        self.final_parameters = params
    
    ## set hidden layer activation function if different then default
    ## options limited to the functions coded inside
    def set_hidden_function(self, function):
        if ((function != 'relu') and  (function != 'sigmoid') and  (function != 'softmax')):
            print("function must be 'relu', 'sigmoid', or 'softmax', new functions may be added in the future")
        else:
            self.hidden_function = function

    ## set output layer activation function if different then default
    ## options limited to the functions coded inside
    def set_last_function(self, function):
        if ((function != 'relu') and  (function != 'sigmoid') and  (function != 'softmax')):
            print("function must be 'relu', 'sigmoid', or 'softmax', new functions may be added in the future")
        else:
            self.last_function = function
    
    ## definitions for the activation layers functions
    ## also definitions for the backward propogation using derivitives
    ## currently includes relu and sigmoid
    ## TODO soft_max
    def __sigmoid(self, Z):
        Z = Z.astype(float)
        A = 1/(1+np.exp(-Z))
        activation_cache = Z
        return A, activation_cache
    
    def __relu(self, Z):
        A = np.maximum(0,Z)
        activation_cache = Z
        return A, activation_cache
    
    def __soft_max(self, Z):
        num_rows, num_columns = Z.shape
        activation_cache = Z
        A = Z*0
        exps = np.exp(Z)
        exps_sums = np.zeros((1,Z.shape[1]))
        exps_sums = np.sum(exps, axis = 0)
        for i in range(num_columns):
            for j in range(num_rows):
                A[j][i] = exps[j][i]/exps_sums[i]
        return A, activation_cache
    
    def __sigmoid_backward(self, dA, activation_cache):
        S, Z = self.__sigmoid(activation_cache)
        dZ = dA * (S*(1.0-S))
        return dZ
    
    def __relu_backward(self, dA, activation_cache):
        Z = activation_cache
        num_rows, num_columns = Z.shape
        dZ = Z * 0
        for i in range(num_rows):
            for j in range(num_columns):
                if Z[i][j] <= 0:
                    dZ[i][j] = 0
                elif Z[i][j] > 0:
                    dZ[i][j] = dA[i][j]
        return dZ
    
    def __soft_max_backward(self, dA, activation_cache):
        S, Z = self.__soft_max(activation_cache)
        m, n = Z.shape
        p = soft_max(Z)
        # outer products
        # (p1^2  p1*p2 p1*p3 ...)
        # (p2*p1 p2^2  p2*p3 ...)
        # (...                  )
        tensor1 = np.einsum('ij,ik->ijk',p,p)
        # (n,n) identitity of feature vector
        # (p1 0  0 ...)
        # (0  p2 0 ...)
        # (...        )
        tensor2 = np.einsum('ij,jk->ijk',p,np.eye(n,n))
    
        dSoftmax = tensor2 - tensor1
        dZ = np.einsum('ijk,ik->ij', dSoftmax, dA)
        return dZ

    ## initialises weights and bias variables for a two-layer NN
    def __initialize_parameters(self, n_x, n_h, n_y):
        np.random.seed(1)
        
        W1 = np.random.randn(n_h, n_x) * 0.01
        b1 = np.zeros((n_h, 1))
        W2 = np.random.randn(n_y, n_h) * 0.01
        b2 = np.zeros((n_y,1))
    
        parameters = {"W1": W1,
                      "b1": b1,
                      "W2": W2,
                      "b2": b2}
        return parameters
    
    ## linear forward propogation step, taken at each layer
    def __linear_forward(self, A, W, b):
        Z = np.dot(W,A) + b
        cache = (A,W,b)
        return Z, cache
    
    ## forward propagation for activation step taken at each layer
    def __linear_activation_forward(self, A_prev, W, b, activation):
        if activation == "sigmoid":
            Z, linear_cache = self.__linear_forward(A_prev, W, b)
            A, activation_cache = self.__sigmoid(Z)
        
        elif activation == "relu":
            Z, linear_cache = self.__linear_forward(A_prev, W, b)
            A, activation_cache = self.__relu(Z)
        
        elif activation == "soft_max":
            Z, linear_cache = self.__linear_forward(A_prev, W, b)
            A, activation_cache = self.__soft_max(Z)
        
        cache = (linear_cache, activation_cache)
    
        return A, cache
    
    ## cost function
    def __compute_cost(self, AL, Y):
        m = Y.shape[1]*Y.shape[0]
        cost = -(np.sum(Y*np.log(AL) + (1-Y)*np.log(1-AL)))/m
        cost = np.squeeze(cost)
        return cost
    
    ## linear backward propogation step, taken at each layer
    def __linear_backward(self, dZ, cache):
        A_prev, W, b = cache
        m = A_prev.shape[1]
    
        dW = (np.dot(dZ,A_prev.T))/m
        db = np.sum(dZ, axis=1, keepdims=True)/m
        dA_prev = np.dot(W.T, dZ)
    
        return dA_prev, dW, db
    
    ## backward propagation for activation step taken at each layer
    def __linear_activation_backward(self, dA, cache, activation):
        linear_cache, activation_cache = cache
    
        if activation == "relu":
            dZ = self.__relu_backward(dA, activation_cache)
            dA_prev, dW, db = self.__linear_backward(dZ, linear_cache)
        
        elif activation == "sigmoid":
            dZ = self.__sigmoid_backward(dA, activation_cache)
            dA_prev, dW, db = self.__linear_backward(dZ, linear_cache)
        
        elif activation == "soft_max":
            dZ = self.__soft_max_backward(dA, activation_cache)
            dA_prev, dW, db = self.__linear_backward(dZ, linear_cache)
        
        return dA_prev, dW, db
    
    ## update parameters using gradient descent
    def __update_parameters(self, params, grads, learning_rate):
        parameters = params.copy()
        L = len(parameters) // 2 #num layers
    
        for l in range(L):
            parameters["W" + str(l+1)] = parameters["W"+str(l+1)] - learning_rate*grads["dW" + str(l+1)]
            parameters["b" + str(l+1)] = parameters["b"+str(l+1)] - learning_rate*grads["db" + str(l+1)]
        
        return parameters
    
    ## runs the two layer NN 
    def __two_layer_model(self, X, Y):
        np.random.seed(1)
        grads = {}
        costs = []
        m = X.shape[1]
        (n_x, n_h, n_y) = self.dimensions
        
        parameters = self.__initialize_parameters(n_x, n_h, n_y)
    
        W1 = parameters["W1"]
        b1 = parameters["b1"]
        W2 = parameters["W2"]
        b2 = parameters["b2"]
        
        for i in range(0, self.num_iterations):
            A1, cache1 = self.__linear_activation_forward(X, W1, b1, self.hidden_function)
            A2, cache2 = self.__linear_activation_forward(A1, W2, b2, self.last_function)
            cost = self.__compute_cost(A2, Y)
            dA2 = -(np.divide(Y, A2) - np.divide(1-Y, 1-A2))
            dA1, dW2, db2 = self.__linear_activation_backward(dA2, cache2, self.last_function)
            dA0, dW1, db1 = self.__linear_activation_backward(dA1, cache1, self.hidden_function)
            grads['dW1'] = dW1
            grads['db1'] = db1
            grads['dW2'] = dW2
            grads['db2'] = db2
            
            parameters = self.__update_parameters(parameters, grads, self.learning_rate)
            
            W1 = parameters["W1"]
            b1 = parameters["b1"]
            W2 = parameters["W2"]
            b2 = parameters["b2"]
        
            if self.print_cost and (i % 100 == 0 or i == self.num_iterations - 1):
                print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
            if i % 100 == 0 or i == self.num_iterations:
                costs.append(cost)
            
        return parameters, costs
    
    def __plot_costs(self, costs):
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per hundreds)')
        plt.title("Learning rate =" + str(self.learning_rate))
        plt.show()
        
    def __predict(self, parameters, X, Y, option="binary", print_accuracy = True):
        W1 = parameters["W1"]
        b1 = parameters["b1"]
        W2 = parameters["W2"]
        b2 = parameters["b2"]
        A1, cache1 = self.__linear_activation_forward(X, W1, b1, self.hidden_function)
        A2, cache2 = self.__linear_activation_forward(A1, W2, b2, self.last_function)
        correct = 0
        total = 0
        if option == "binary":
            A2 = np.round(A2)
            num_rows, num_columns = A2.shape
            for i in range(num_rows):
                for j in range(num_columns):
                    if A2[i][j] == Y[i][j]:
                        correct += 1
                    total +=1
                
        if option == "multiclass":
            Y_arg = np.argmax(Y, axis = 0)
            A2_arg = np.argmax(A2, axis = 0)
            for i in range(len(Y_arg)):
                if Y_arg[i] == A2_arg[i]:
                    correct += 1
                total += 1
        if print_accuracy:
            print("total = "+str(total)+" correct = "+str(correct))
            print("Accuracy = "+str(correct/total))
        return correct/total
    
    def run_machine(self):
        self.final_parameters, costs = self.__two_layer_model(self.X, self.Y)
        if self.plot_cost_bool:
            self.__plot_costs(costs)
        
    def machine_accuracy(self):
        self.__predict(self.final_parameters, self.X, self.Y, option="multiclass")
        self.__predict(self.final_parameters, self.test_X, self.test_Y, option="multiclass")
        
    def optimize_dimensions(self, max_size, plot_costs = True):
        n_x, n_h, n_y = self.dimensions
        accuracy_high = 0
        optimal_dimension = 0
        for i in range(1, max_size + 1):
            self.dimensions = (n_x, i, n_y)
            self.run_machine()
            accuracy = self.__predict(self.final_parameters, self.test_X, self.test_Y, option="multiclass")
            if round(accuracy_high,5) < round(accuracy,5):
                accuracy_high = accuracy
                optimal_dimension = i
        optimal_dims = np.array((n_x, optimal_dimension, n_y))
        print("optimal dimensions are: "+str(optimal_dims))
        return(optimal_dims)
    
    def run_optimal(self, max_size, plot_costs = True):
        self.plot_cost_bool = plot_costs
        self.dimensions = self.optimize_dimensions(max_size)
        self.run_machine()
        self.machine_accuracy()
        
    
            
            