In [104]:
import numpy as np
import random
import math
from IPython.display import display, clear_output
from sklearn.utils import shuffle as shuffle_data

random.seed(0)

# Layer represents a MLP Layer
# It has two main properties:
#      - a weigth matrix containing the weights of the layer's neurons. Each line represents a neuron and 
#        the columns represent its corresponding weights
#      - a bias vector, containing the neurons's bias
# Since during the backpropagation we need to compute the weights variation using the old ones, the 
# updated_weights and updated_bias properties store the new values until the update method is called
class Layer:
    # Create a new Layer with 'size' neurons, each one linked to 'inputs_size' inputs
    def __init__(self, size, inputs_size):
        self.size = size
        self.inputs_size = inputs_size
        self.weights = np.array([[random.uniform(-0.1, 0.1) for j in range(inputs_size)] for i in range(size)])
        self.bias = np.array([random.uniform(-0.1,0.1) for i in range(size)])
        
        self.d_weights_current = np.zeros((size, inputs_size))
        self.d_bias_current = np.zeros(size)
        self.d_weights_old = np.zeros((size, inputs_size))
        self.d_bias_old = np.zeros(size)
    
    # update updates the weights and bias matrices with the values stored in the updated ones
    def update(self, eta, alpha):
        #self.weights = np.copy(self.updated_weights)
        #self.bias = np.copy(self.updated_bias)
        
        self.weights = self.weights + eta*self.d_weights_current + alpha*self.d_weights_old 
        self.bias = self.bias + eta*self.d_bias_current + alpha*self.d_bias_old
        
        self.d_weights_old = self.d_weights_current
        self.d_bias_old = self.d_bias_current
        
    # description prints a layer description
    def description(self):
        print("Layer Info")
        print("Weights: \n", self.weights)
        print("Bias: \n ", self.bias)

def logistic(x):
    return 1.0/(1.0+ math.exp(-x))

logistic_vec = np.vectorize(logistic)

def logistic_derivate(x):
    return x*(1.0-x)

In [4]:
class MLP:
    # MLP creation. One might pass the MLP layers as parameters or add them later using the add_layer method.
    # The classification parameter defines if the MLP will be used for a classification or regression problem
    def __init__(self, *layers, classifier=True):
        self.classifier = classifier
        if classifier:
            # Map each class label to a vector with a single 1
            # Ex: Class 0 -> [1,0]
            #     Class 1 -> [0,1]
            self.class_mapping = dict()  
            # Unmap each class vector to the corresponding class label
            # Ex: [1,0] -> Class 0 
            #     [0,1] -> Class 1
            self.class_unmapping = dict()
            
        self.layers = list()
        for layer in layers:
            self.add_layer(layer)
    
    # Shortcut to create a classifier MLP
    @classmethod
    def MLPClassifier(cls, *layers):
        return cls(classifier=True, *layers)   
    
    # Shortcut to create a regressor MLP
    @classmethod
    def MLPRegressor(cls, *layers):
        return cls(classifier=False, *layers)
    
    # add_layer adds a new layer on the MLP. It verifies whether or not the new layer is compatible with the MLP
    def add_layer(self, layer):
        # If there's already a layer in the MLP, verify if the new layer is compatible
        if len(self.layers) > 0:
            if layer.inputs_size != self.layers[-1].size:
                print("The new layer is incompatible with the MLP")
                print("Please, use a layer where each neuron has the same amount of inputs as the number" \
                     "of neurons in the MLP last layer")
        
        self.layers.append(layer)
    
    # description prints the info about the MLP layers
    def description(self):
        print("MLP Classifier?: ", self.classifier)
        print("-------------------------")
        print("MLP Info:")
        for layer, i in zip(self.layers, range(len(self.layers))):
            print("--- Layer: %d ---" % i)
            layer.description()
            
    # __get_class_mapping gets the class labels in the classes list and builds the mapping dicionaries
    # class_mapping and class_unmapping
    def __get_class_mapping(self, classes):
        class_labels = np.unique(classes)
        
        for c in range(len(class_labels)):
            class_label = class_labels[c]
            class_vector = np.zeros(len(class_labels))
            class_vector[c] = 1
    
            self.class_mapping[class_label] = class_vector
            
            # We can't use a list as a hash key. So transform it into a tuple
            self.class_unmapping[tuple(class_vector)] = class_label
        
    # __convert_class_labels_to_vectors converts a list with class labels to a list with 
    # vectors that maps each class label
    def __convert_class_labels_to_vectors(self, class_labels):
        return [self.class_mapping[c] for c in class_labels]
    
    # __convert_class_vectors_to_labels converts a list with class vectors to a list with 
    # the corresponding class labels
    def __convert_class_vectors_to_labels(self, class_vectors):
        return [self.class_unmapping[tuple(class_vector)] for class_vector in class_vectors]
        
        
    # fast_forward computes the ouput for a given input vector
    def fast_forward(self,input_v):
        # We need to store each layer input in order to perform the backpropagation
        self.inputs = list()
    
        # The input is applied in a layer weights matrix and the bias is added in the result
        # Then, the logistic function is applied to each layer neuron result
        # For a layer, we have a final output vector where each component i represents the output
        # of the neuron i
        for layer in self.layers:
            self.inputs.append(input_v)
            output = logistic_vec(layer.weights @ input_v + layer.bias)
            
            # The output of the current layer is the input of the next one
            input_v = output
        
        return output
    
    # train trains the MLP using the examples passed in the samples parameter
    # The expected output for each example must be passed in the classes parameter;
    # eta represents the MLP learning rate;
    # tol represents the error tolerance. The MLP is trained until the cumulative squared error for all example
    #     is less than the tol value
    # print_status prints the output for each example during the training phase
    def train(self, samples, classes, eta=0.5, alpha=0, tol=1e-2, epoch_max=2000, 
              print_status=False, shuffle=True):
        # Map the class labels to output vectors if it's a classification problem
        if self.classifier:
            self.__get_class_mapping(classes)
            classes = self.__convert_class_labels_to_vectors(classes)
                
        error = tol
        new_error = 3*tol
        epoch = 0
        
        # The training stops when the max number of epochs is reached or the Kramer and Sangiovanni-Vicentelly
        # criteria is valid. According to it, we can consider that the BP converged when the average mean squared
        # error is less than a given tolerance
        while (abs(new_error - error) > tol and epoch < epoch_max):
            epoch += 1
            error = 0
            new_error = 0
            
            # Suffles samples to avoid saturation if training with samples beloging to the same class
            # one after another
            if shuffle:
                samples, classes = shuffle_data(samples, classes)
            
            for input_v, t in zip(samples, classes):  
                # ---- Compute the output for the given input vector ----
                output = self.fast_forward(input_v)
                
                # Compute the mean squared error before the backpropagation
                error_sample = pow((np.array(t)-np.array(output)),2)
                # We need to sum the error of each component when the output is a vector
                error += sum(error_sample)/len(samples)
                
                if (print_status == True):
                    print("\ttraining example: %s from class %s" % (input_v, t), end = " ")
                    print("y = ", output)
     
                # ---- Backpropagation ----
                # Compute the new weights of each layer
                # Remark: the udpated weights are stored as a layer property and the layer is updated once 
                # the backpropagation is finished
                # It's necessary to do so in order to compute the delta value for the inner layers. We need 
                # to use the weights that caused the error to compute the delta instead of the updated weights
                for l in reversed(range(len(self.layers))): # Traverse the layers in reversed order
                    layer = self.layers[l]
             
                    deltas = list()
                    # Compute the delta for each layer neuron n
                    for n in range(len(layer.weights)):
                        # Last Layer
                        if l == (len(self.layers)-1):
                            delta = (t[n]-output[n])*logistic_derivate(output[n])
                            
                        # Inner Layer
                        else:
                            # output of the current layer is the input of the next one
                            neuron_output = self.inputs[l+1][n]
                            # weights of each neuron output
                            errors_weights = self.layers[l+1].weights[:,n]
                            
                            delta = np.dot(delta_next_layer,errors_weights)*logistic_derivate(neuron_output)
                              
                        # Computes the weights and bias variation for the neuron n
                        for w in range(len(layer.weights[n])):
                            layer.d_weights_current[n][w] = delta*self.inputs[l][w]
                        layer.d_bias_current[n] = delta*1 # bias input = 1
                        
                        #for w in range(len(layer.weights[n])):
                        #    layer.updated_weights[n][w] = layer.weights[n][w] + eta*delta*self.inputs[l][w]
                        #layer.updated_bias[n] = layer.bias[n] + (eta*delta*1) # bias input = 1

                        # Store the neuron delta
                        deltas.append(delta)
                    
                    # The neurons' delta of the current layer will be used to compute the deltas of the 
                    # next inner layer
                    delta_next_layer = np.array(deltas)
                     
                # Once the backpropagation is finished for the current example, update all the weigths and bias
                for layer in self.layers:
                    layer.update(eta, alpha)
                    
                # Compute the new error mean squared error
                output = self.fast_forward(input_v)
                error_sample = pow((np.array(t)-np.array(output)),2)
                #print("error sample: ",error_sample)
                new_error += sum(error_sample)/len(samples)
            
            # End of a epoch
            if epoch%1 == 0: # Print status only after each 100 iterations 
                clear_output(wait=True)
                display("End of epoch " + str(epoch) + ". Total Error = " + str(new_error))
        
        # End of training         
        clear_output(wait=True)
        display("End of epoch " + str(epoch) + ". Total Error = " + str(new_error))
        
    # predicts gets a list of input samples and returns a list with the predicted outputs
    def predict(self, samples):
        outputs = list()
        for input_v in samples:
            probs = self.fast_forward(input_v)
            
            if self.classifier:
                class_pos = np.argmax(probs)
                output = np.zeros(len(probs))
                output[class_pos] = 1
            
                #outputs.append(self.class_unmapping[tuple(output)])
                outputs.append(output)
                
            else:
                outputs.append(probs)
    
        if self.classifier:
            return self.__convert_class_vectors_to_labels(outputs)
        
        else:
            return outputs

# Pre Processing Data

In [105]:
# normalize data transforms data in order to all points have mean 0 and variance 1
def normalize_data(data):
    normalized_columns = list()
    for c in range(len(data[0])):
        col = data[:,c]
        normalized_columns.append((col - np.mean(col))/np.std(col))

    return np.array(normalized_columns).T

In [106]:
# scale_data transforms data in order to all points be in the interval [0,1]
def scale_data(data):
    normalized_columns = list()
    for c in range(len(data[0])):
        col = data[:,c]
        normalized_columns.append((col-np.min(col))/(np.max(col)-np.min(col)))

    return np.array(normalized_columns).T

# Evaluation

In [107]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import pandas as pd

def evaluate(real_classes, predicted_classes, display=True):
    acc = accuracy_score(real_classes, predicted_classes)
    
    class_labels = np.sort(np.unique(real_classes))
    cm = confusion_matrix(real_classes, predicted_classes, labels=class_labels)
    df = pd.DataFrame(cm)
    df.columns = class_labels
    df.index = class_labels
    
    # Acurracy per class
    accs = list()
    for c in range(len(cm)):
        accs.append(cm[c,c]/sum(cm[c,:]))
    df["Accuracy"] = accs
    
    avg_acc = np.average(accs)
    
    if display == True:
        print("Accuracy: %.2f%%" % (acc*100))
        
        print("Confusion Matrix and Accuracy per class:")
        print(df)

        print("Average accuracy per class: %.2f%%" % (avg_acc*100))
        
    return acc, avg_acc

# Classification

Para o problema de classificação, vamos pegar o conjunto de dados fornecidos e vamos normalizá-los de forma que cada valor de feature esteja no intervalo (0,1). Isso será feito para evitar a saturação da saída dos neurônios e melhorar a convergência do algoritmo de aprendizagem.

A seguir, tomaremos um conjunto de treinamento consistindo em 70% da base original. Iremos avaliar o impacto da arquitetura da rede assim como da variação de parâmetros de aprendizado no valor de acurácia obtido na classificação do conjunto de teste. 

Iremos calcular a acurácia total e a acurácia por classe. Uma vez que não estamos impondo penalidades diferentes para erros cometidos em determinadas classes, vamos considerar como melhor arquitetura aquela que fornece maior valor para a acurácia total, embora isso possa não refletir em uma acurácia por classe elevada! Mais tarde, iremos tratar melhor o balanceamento entre as classes a fim de que o aprendizado da rede seja adequado para todas elas.

## Original DataSet

In [108]:
import pandas as pd

df = pd.read_csv('winequality-red.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,category
0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Mid
1,1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,Mid
2,2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,Mid
3,3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,Mid
4,4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Mid


In [8]:
# data separation
inputs = df[df.columns[1:-1]].values
classes = df[df.columns[-1]].values

print(inputs[0:2,:])
print(classes)

[[ 7.4     0.7     0.      1.9     0.076  11.     34.      0.9978  3.51
   0.56    9.4   ]
 [ 7.8     0.88    0.      2.6     0.098  25.     67.      0.9968  3.2
   0.68    9.8   ]]
['Mid' 'Mid' 'Mid' ... 'Mid' 'Mid' 'Mid']


In [26]:
unique, counts = np.unique(classes, return_counts=True)
print(unique)
print(counts)

['Bad' 'Good' 'Mid']
[  63  217 1319]


In [110]:
scaled_inputs = scale_data(inputs)

### Split - Training - Test

In [111]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.3, 
                                                    stratify=classes,random_state=42)

# Testing different network architectures

### 1 layer - 5 Neurons

In [None]:
# Training
N1 = 5
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(n_classes, N1))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 1 Layer - 11 Neurons

In [None]:
N1 = 11
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(n_classes, N1))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 1 Layer - 22 Neurons

In [None]:
N1 = 22
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(n_classes, N1))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 5 Neurons - 5 Neurons

In [None]:
N1 = 5
N2 = 5
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 5 Neurons - 11 Neurons

In [None]:
N1 = 5
N2 = 11
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 5 Neurons - 22 Neurons

In [None]:
N1 = 5
N2 = 22
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 11Neurons - 5 Neurons

In [None]:
N1 = 11
N2 = 5
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 11Neurons - 11 Neurons

In [None]:
N1 = 11
N2 = 11
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 11Neurons - 22 Neurons

In [None]:
N1 = 11
N2 = 22
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 22Neurons - 5 Neurons

In [None]:
N1 = 22
N2 = 5
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 22Neurons - 11 Neurons

In [None]:
N1 = 22
N2 = 11
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

### 2 layers: 22Neurons - 22 Neurons

In [None]:
N1 = 22
N2 = 22
N = len(X_train[0])
n_classes = len(np.unique(y_train))

random.seed(0)
mlp = MLP.MLPClassifier(Layer(N1, N), Layer(N2, N1), Layer(n_classes, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=400, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

# Number of epochs used during training

Vamos analisar como o número de epochs usadas durante o treinamento interfere na acurácia obtida no conjunto de testes! Para isso, vamos utilizar a arquitetura de rede que obteve melhor desempenho:



In [112]:
accs = dict()
best_N1 = 11
best_N2 = 11

# 200 epochs

In [None]:
epochs = 200

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[epochs] = evaluate(y_test, predicted)

# 400 epochs

In [None]:
epochs = 400

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[epochs] = evaluate(y_test, predicted)

# 800 epochs

In [None]:
epochs = 800

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[epochs] = evaluate(y_test, predicted)

# 1000 epochs

In [None]:
epochs = 1000

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[epochs] = evaluate(y_test, predicted)

# 2000 epochs

In [None]:
epochs = 2000

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[epochs] = evaluate(y_test, predicted)

In [None]:
print(accs)

In [None]:
epochs_accs = pd.DataFrame(accs)
epochs_accs.index = ["Total accuracy", "Accuracy per class"]
print(epochs_accs)

Repare que embora a Loss Function decresça para 1000 e 2000 epochs, ao avaliar o modelo no conjunto de testes, há uma queda na acurácia! isso ilustra a ocorrência de um overfitting do modelo. Portanto, nas próximas etapas, iremos tomar 800 epochs como o máximo de iterações durante a fase de treinamento, uma vez que ela mantém um bom nível de acurácia total, equanto eleva a acurácia por classe.

In [113]:
best_epochs = 800

# Learning Rate and Momentum

Fixando o número de epochs em 500 e utilizando a arquitura de rede com melhor desempenho (2 camadas intermediárias: 5 neurônios na primeira e 11 na segunda), vamos variar os parâmetros learning rate (eta) e momentum (alfa), e vamos observar como eles interferem no aprendizado.

## Learning Rate = 0.3 e Momentum = 0.3

In [None]:
accs = dict()

In [None]:
eta = 0.3
alpha = 0.3

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.3 e Momentum = 0.5

In [None]:
eta = 0.3
alpha = 0.5

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.3 e Momentum = 0.8

In [None]:
eta = 0.3
alpha = 0.8

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.5 e Momentum = 0.3

In [None]:
eta = 0.5
alpha = 0.3

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.5 e Momentum = 0.5

In [None]:
eta = 0.5
alpha = 0.5

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.5 e Momentum = 0.8

In [None]:
eta = 0.5
alpha = 0.8

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.8 e Momentum = 0.3

In [None]:
eta = 0.8
alpha = 0.3

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.8 e Momentum = 0.5

In [None]:
eta = 0.8
alpha = 0.5

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 0.8 e Momentum = 0.8

In [None]:
eta = 0.8
alpha = 0.8

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

## Learning Rate = 1 e Momentum = 1

In [None]:
eta = 1
alpha = 1

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted)

In [None]:
accs

In [None]:
accs = store
accs

In [None]:
store

In [None]:
for k, v in accs.items():
    print(k)
    print(v)
    
#store = accs
#store

In [None]:
total_acc = pd.DataFrame()
avg_acc = pd.DataFrame()

for run, acc in accs.items():
    total_acc.at[str(run[0]), str(run[1])] = acc[0]
    avg_acc.at[str(run[0]), str(run[1])] = acc[1]
    
print(total_acc)

In [None]:
print(avg_acc)

Podemos observar que, quando o learning rate e momentum são de magnitude elevada (próximos à 1)

In [114]:
best_eta   = 0.5
best_alpha = 0.5

# Training and Test Sets

<div style="text-align: justify"> Agora, vamos variar o tamanho dos conjuntos de treinamento e teste utilizando a melhor arquitetura encontrada acima e os melhores valores de learning rate e momentum. Utilizaremos incialmente 70% dos dados para treinamento, aumentando gradativamente esse valor até 90%.</div>

## 70% for training, 30% for test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.3, 
                                                    stratify=classes,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

## 75% for training, 25% for test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.25, 
                                                    stratify=classes,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

## 80% for training, 20% for test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.2, 
                                                    stratify=classes,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

## 85% for training, 15% for test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.15, 
                                                    stratify=classes,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

## 90% for training, 10% for test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.1, 
                                                    stratify=classes,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

## 95% for training, 5% for test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.05, 
                                                    stratify=classes,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

A partir das classificações realizadas, é possível observar que a variação nos tamanhos dos conjuntos de treinamento e teste resultaram em mudanças pouco significantes nos valores de acurácia obtidos. 

Vale destacar que utilizando 70% dos dados para treinamento e 30% dos dados para teste, obtivemos a melhor acurácia média por classe, enquanto que utilizando 85% dos dados para treinamento e 15% para teste, foi obtida a melhor acurácia geral.

Assim, como a diferença na acurácia geral de ambos os casos citados é de pouco mais de 1%, a divisão entre 70% e 30% será considerada como a que apresentou melhores resultados e será utilizada para os próximos testes daqui em diante.

In [115]:
best_test_size = 0.3

# Better Pre Processing Data

Como podemos observar nas classificações acima, na base de dados considerada, há poucos exemplos da classe Bad. Como consequência, como estamos dando o mesmo peso para um erro cometido em qualquer classe, a rede acaba por classificar a maioria dos exemplos como sendo pertecentes à classe com maior número de exemplos ('Mid').

A seguir, vamos considerar uma base de dados com um número balanceado entre as classes. Para tal, vamos tomar o tamanho da menor classe e escolher exemplos aleatórios das demais classes para igualar esse número.

Também vamos considerar uma base de dados com exemplos artificiais que serão criados para igualar o número de exemplos da menor classe com o número de exemplos da classe intermediária. Não igualaremos o número de exemplares pela classe de maior cardinalidade, uma vez que, no mundo real, é normal que haja mais itens de qualidade intermediária que itens de qualidade ruim ou boa. 

Para melhor comparar as base de dados e não ser influenciado pela aleatoriedade com a qual os conjuntos de treinamento e teste são escolhidos, iremos avaliar os modelos utilizando CrossValidation Stratified com 10 folds!

## Undersampling

In [17]:
# gets the minor quantity of examples, among all the classes from the dataset
examples = min(counts)

In [18]:
# randomly chooses 63 indexes of examples from each class 
bad_indices = np.random.choice(np.where(classes == 'Bad')[0], examples)
good_indices = np.random.choice(np.where(classes == 'Good')[0], examples)
mid_indices = np.random.choice(np.where(classes == 'Mid')[0], examples)

# stores the chosen examples from 'Bad' class, as well as the same amount of labels from it
under_sampled_examples = scaled_inputs[bad_indices]
under_sampled_classes = classes[bad_indices]

print(under_sampled_classes)

# stores the chosen examples from 'Good' class, as well as the same amount of labels from it
under_sampled_examples = np.append(under_sampled_examples,scaled_inputs[good_indices], axis=0)
under_sampled_classes = np.append(under_sampled_classes, classes[good_indices], axis=0)

# stores the chosen examples from 'Mid' class, as well as the same amount of labels from it 
under_sampled_examples = np.append(under_sampled_examples,scaled_inputs[mid_indices], axis=0)
under_sampled_classes = np.append(under_sampled_classes, classes[mid_indices], axis=0)

['Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad'
 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad'
 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad'
 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad'
 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad' 'Bad'
 'Bad' 'Bad' 'Bad']


In [None]:
# splits the new dataset in a training set and a test set, using 70% to 30% proportion
X_train, X_test, y_train, y_test = train_test_split(under_sampled_examples, under_sampled_classes, 
                                                    test_size=best_test_size, 
                                                    stratify=under_sampled_classes,random_state=42)

### Without Cross Validation metrics

In [None]:
# uses the best architecture, number of epochs and alpha and eta values, all previously obtained,
# for classifying the new examples, without using Stratified K-Fold metrics
random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

<div style="text-align: justify"> Como é possível observar, a acurácia obtida na classificação utilizando undersampling piorou em cerca de 10% em comparação com a classificação realizada utilizando a base de dados original. Porém, a acurácia média por classes melhorou consideravelmente, em cerca de 20%, quando comparada à melhor acurácia por classe obtida anteriormente, de aproximadamente 53%. Isso se deve ao fato de que a quantidade de exemplos disponíveis para cada classe é a mesma, de forma que a classificação não se torna enviesada, favorecendo os exemplos da classe majoritária em detrimento das outras.</div>

### With Cross Validation metrics

In [20]:
# uses the best architecture, number of epochs and alpha and eta values, all previously obtained,
# for classifying the new examples, using Stratified K-Fold metrics, for K = 10.
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=10)
accuracies = list()

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)

X = under_sampled_examples
y = under_sampled_classes

for train_index, test_index in skf.split(X, y):
    print(train_index, test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
              print_status=False, shuffle=True)

    predicted  = mlp.predict(X_test)
    acc = evaluate(y_test, predicted, display=False)
    accuracies.append(acc)
    
mean_acc = np.array([])
mean_classes_acc = np.array([])

for i in range(len(accuracies)):
    mean_acc = np.append(mean_acc, accuracies[i][0])
    mean_classes_acc = np.append(mean_classes_acc, accuracies[i][1])

print("Average accuracy: " + '{:.2f}'.format(np.mean(mean_acc)*100) + "%")
print("Average accuracy per class: " + '{:.2f}'.format(np.mean(mean_classes_acc)*100) + "%")

'End of epoch 800. Total Error = 0.04805420786560231'

Average accuracy: 87.30%
Average accuracy per class: 87.30%


### Testing on the complete dataset, trained with the undersampled dataset

In [22]:
predicted  = mlp.predict(X_test)
acc = evaluate(y_test, predicted)

Accuracy: 53.12%
Confusion Matrix and Accuracy per class:
      Bad  Good  Mid  Accuracy
Bad    19     0    0  1.000000
Good    2    47   16  0.723077
Mid   112    95  189  0.477273
Average accuracy per class: 73.34%


## Over Sampling

In [123]:
from imblearn.over_sampling import SMOTE

# specify the class targeted by the resampling. The number of samples in the different classes will be equalized
# 'not majority': resample all classes but the majority class
sm = SMOTE(ratio='not majority')

df_majority = df[df["category"] == "Mid"]
df_inter = df[df["category"] == "Good"]
df_minority = df[df["category"] == "Bad"]

# new_df containing only intermediate and minority classes
new_df = df_inter.append(df_minority)

# resample the dataset, using parameters: matrix containing the data which have to be sampled and corresponding 
# label for each sample in matrix
# 'over_sampled_dfX': The array containing the resampled data
# 'over_sampled_dfY': The corresponding label of over_sampled_dfX
over_sampled_dfX, over_sampled_dfY = sm.fit_sample(new_df.drop('category', axis=1), new_df['category'])

# over_sampled_df containing the resampled data (intermediate and minority classes)
over_sampled_df = pd.concat([pd.DataFrame(over_sampled_dfX), pd.DataFrame(over_sampled_dfY)], axis=1)
over_sampled_df.columns = new_df.columns

# append majority class
over_sampled_df = over_sampled_df.append(df_majority)

In [124]:
# Separação dos Dados
over_sampled_inputs = over_sampled_df[over_sampled_df.columns[1:-1]].values
over_sampled_classes = over_sampled_df[over_sampled_df.columns[-1]].values

print(over_sampled_inputs[0:2,:])
print(over_sampled_classes)

[[7.300e+00 6.500e-01 0.000e+00 1.200e+00 6.500e-02 1.500e+01 2.100e+01
  9.946e-01 3.390e+00 4.700e-01 1.000e+01]
 [7.800e+00 5.800e-01 2.000e-02 2.000e+00 7.300e-02 9.000e+00 1.800e+01
  9.968e-01 3.360e+00 5.700e-01 9.500e+00]]
['Good' 'Good' 'Good' ... 'Mid' 'Mid' 'Mid']


In [125]:
over_sampled_scaled_inputs = scale_data(over_sampled_inputs)
over_sampled_scaled_inputs

array([[0.23893805, 0.3630137 , 0.        , ..., 0.51181102, 0.08383234,
        0.24615385],
       [0.28318584, 0.31506849, 0.02      , ..., 0.48818898, 0.14371257,
        0.16923077],
       [0.34513274, 0.10958904, 0.56      , ..., 0.44094488, 0.25149701,
        0.32307692],
       ...,
       [0.15044248, 0.26712329, 0.13      , ..., 0.53543307, 0.25149701,
        0.4       ],
       [0.11504425, 0.35958904, 0.12      , ..., 0.65354331, 0.22754491,
        0.27692308],
       [0.12389381, 0.13013699, 0.47      , ..., 0.51181102, 0.19760479,
        0.4       ]])

In [126]:
unique, counts = np.unique(over_sampled_classes, return_counts=True)
print(unique)
print(counts)

['Bad' 'Good' 'Mid']
[ 217  217 1319]


In [127]:
# splits the new dataset in a training set and a test set, using 70% to 30% proportion
X_train, X_test, y_train, y_test = train_test_split(over_sampled_scaled_inputs, over_sampled_classes, 
                                                    test_size=best_test_size, 
                                                    stratify=over_sampled_classes,random_state=42)

### Without Cross Validation metrics

In [128]:
# uses the best architecture, number of epochs and alpha and eta values, all previously obtained,
# for classifying the new examples, without using Stratified K-Fold metrics
random.seed(0)
N = len(X_train[0])
n_classes = len(np.unique(y_train))
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
          print_status=False, shuffle=True)

predicted = mlp.predict(X_test)
evaluate(y_test, predicted)

'End of epoch 800. Total Error = 0.141255717398671'

Accuracy: 77.95%
Confusion Matrix and Accuracy per class:
      Bad  Good  Mid  Accuracy
Bad    27     1   37  0.415385
Good    0    24   41  0.369231
Mid    16    21  359  0.906566
Average accuracy per class: 56.37%


(0.779467680608365, 0.5637270137270137)

### With Cross Validation metrics

In [43]:
# uses the best architecture, number of epochs and alpha and eta values, all previously obtained,
# for classifying the new examples, using Stratified K-Fold metrics, for K = 10.
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=10)
accuracies = list()

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)

X = over_sampled_scaled_inputs
y = over_sampled_classes

for train_index, test_index in skf.split(X, y):
    print(train_index, test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=200, 
              print_status=False, shuffle=True)

    predicted  = mlp.predict(X_test)
    acc = evaluate(y_test, predicted, display=False)
    accuracies.append(acc)
    
mean_acc = np.array([])
mean_classes_acc = np.array([])

for i in range(len(accuracies)):
    mean_acc = np.append(mean_acc, accuracies[i][0])
    mean_classes_acc = np.append(mean_classes_acc, accuracies[i][1])

print("Average accuracy: " + '{:.2f}'.format(np.mean(mean_acc)*100) + "%")
print("Average accuracy per class: " + '{:.2f}'.format(np.mean(mean_classes_acc)*100) + "%")

'End of epoch 200. Total Error = 0.1484945660018888'

Average accuracy: 84.83%
Average accuracy per class: 84.83%


### Testing on the complete dataset, trained with the oversampled dataset

In [44]:
X_train, X_test, y_train, y_test = train_test_split(scaled_inputs, classes, test_size=0.3, 
                                                    stratify=classes,random_state=42)

predicted  = mlp.predict(X_test)
acc = evaluate(y_test, predicted)

Accuracy: 80.42%
Confusion Matrix and Accuracy per class:
      Bad  Good  Mid  Accuracy
Bad    13     0    6  0.684211
Good    1    54   10  0.830769
Mid    48    29  319  0.805556
Average accuracy per class: 77.35%


## Complete Dataset

Aqui será feito o treinamento e teste utilizando o conjunto de dados completo, sem utilização de undersampling ou oversampling, com o método de validação cruzada Stratified K-Fold.

In [None]:
# uses the best architecture, number of epochs and alpha and eta values, all previously obtained,
# for classifying the new examples, using Stratified K-Fold metrics, for K = 10.
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=10)
accuracies = list()

random.seed(0)
best_layers = [Layer(best_N1, N), Layer(best_N2, best_N1), Layer(n_classes, best_N2)]
mlp = MLP.MLPClassifier(*best_layers)

X = scaled_inputs
y = classes

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epochs, 
              print_status=False, shuffle=True)

    predicted  = mlp.predict(X_test)
    acc = evaluate(y_test, predicted, display=False)
    accuracies.append(acc)
    
mean_acc = np.array([])
mean_classes_acc = np.array([])

for i in range(len(accuracies)):
    mean_acc = np.append(mean_acc, accuracies[i][0])
    mean_classes_acc = np.append(mean_classes_acc, accuracies[i][1])

print("Average accuracy: " + '{:.2f}'.format(np.mean(mean_acc)*100) + "%")
print("Average accuracy per class: " + '{:.2f}'.format(np.mean(mean_classes_acc)*100) + "%")

# Regressão