# Projeto 1 - Regressão

- Moisés Botarro Ferraz Silva, 8504135
- Thales de Lima Kobosighawa,  9897884
- Victor Rozzatti Tornisiello, 9806867

# Implementação de um MultiLayer Perceptron

Para o problema de Regressão, iremos utilizar mesma classe MLP criada anteriormente. Entretanto, ao instanciá-la, iremos utilizar o construtor MLPRegressor. A diferença de comportamento com o MLPClassifier ocorre apenas na conversão do vetor de saída da rede neural. Como não trabalhamos com classes na regressão, não há a necessidade de conversão entre os vetores binários que saem da rede e labels de classes. Podemos tomar os próprios valores que saem da rede como os valores preditos.

In [None]:
import numpy as np
import random
import math
from IPython.display import display, clear_output
from sklearn.utils import shuffle as shuffle_data
import pandas as pd

random.seed(0)

# Layer represents a MLP Layer
# It has two main properties:
#      - a weigth matrix containing the weights of the layer's neurons. Each line represents a neuron and 
#        the columns represent its corresponding weights
#      - a bias vector, containing the neurons's bias
# Since during the backpropagation we need to compute the weights variation using the old ones, the 
# updated_weights and updated_bias properties store the new values until the update method is called
class Layer:
    # Create a new Layer with 'size' neurons, each one linked to 'inputs_size' inputs
    def __init__(self, size, inputs_size):
        self.size = size
        self.inputs_size = inputs_size
        self.weights = np.array([[random.uniform(-0.1, 0.1) for j in range(inputs_size)] for i in range(size)])
        self.bias = np.array([random.uniform(-0.1,0.1) for i in range(size)])
        
        self.d_weights_current = np.zeros((size, inputs_size))
        self.d_bias_current = np.zeros(size)
        self.d_weights_old = np.zeros((size, inputs_size))
        self.d_bias_old = np.zeros(size)
    
    # update updates the weights and bias matrices with the values stored in the updated ones
    def update(self, eta, alpha):
        #self.weights = np.copy(self.updated_weights)
        #self.bias = np.copy(self.updated_bias)
        
        self.weights = self.weights + eta*self.d_weights_current + alpha*self.d_weights_old 
        self.bias = self.bias + eta*self.d_bias_current + alpha*self.d_bias_old
        
        self.d_weights_old = self.d_weights_current
        self.d_bias_old = self.d_bias_current
        
    # description prints a layer description
    def description(self):
        print("Layer Info")
        print("Weights: \n", self.weights)
        print("Bias: \n ", self.bias)

def logistic(x):
    return 1.0/(1.0+ math.exp(-x))

logistic_vec = np.vectorize(logistic)

def logistic_derivate(x):
    return x*(1.0-x)

In [None]:
class MLP:
    # MLP creation. One might pass the MLP layers as parameters or add them later using the add_layer method.
    # The classification parameter defines if the MLP will be used for a classification or regression problem
    def __init__(self, *layers, classifier=True):
        self.classifier = classifier
        if classifier:
            # Map each class label to a vector with a single 1
            # Ex: Class 0 -> [1,0]
            #     Class 1 -> [0,1]
            self.class_mapping = dict()  
            # Unmap each class vector to the corresponding class label
            # Ex: [1,0] -> Class 0 
            #     [0,1] -> Class 1
            self.class_unmapping = dict()
            
        self.layers = list()
        for layer in layers:
            self.add_layer(layer)
    
    # Shortcut to create a classifier MLP
    @classmethod
    def MLPClassifier(cls, *layers):
        return cls(classifier=True, *layers)   
    
    # Shortcut to create a regressor MLP
    @classmethod
    def MLPRegressor(cls, *layers):
        return cls(classifier=False, *layers)
    
    # add_layer adds a new layer on the MLP. It verifies whether or not the new layer is compatible with the MLP
    def add_layer(self, layer):
        # If there's already a layer in the MLP, verify if the new layer is compatible
        if len(self.layers) > 0:
            if layer.inputs_size != self.layers[-1].size:
                print("The new layer is incompatible with the MLP")
                print("Please, use a layer where each neuron has the same amount of inputs as the number" \
                     "of neurons in the MLP last layer")
        
        self.layers.append(layer)
    
    # description prints the info about the MLP layers
    def description(self):
        print("MLP Classifier?: ", self.classifier)
        print("-------------------------")
        print("MLP Info:")
        for layer, i in zip(self.layers, range(len(self.layers))):
            print("--- Layer: %d ---" % i)
            layer.description()
            
    # __get_class_mapping gets the class labels in the classes list and builds the mapping dicionaries
    # class_mapping and class_unmapping
    def __get_class_mapping(self, classes):
        class_labels = np.unique(classes)
        
        for c in range(len(class_labels)):
            class_label = class_labels[c]
            class_vector = np.zeros(len(class_labels))
            class_vector[c] = 1
    
            self.class_mapping[class_label] = class_vector
            
            # We can't use a list as a hash key. So transform it into a tuple
            self.class_unmapping[tuple(class_vector)] = class_label
        
    # __convert_class_labels_to_vectors converts a list with class labels to a list with 
    # vectors that maps each class label
    def __convert_class_labels_to_vectors(self, class_labels):
        return [self.class_mapping[c] for c in class_labels]
    
    # __convert_class_vectors_to_labels converts a list with class vectors to a list with 
    # the corresponding class labels
    def __convert_class_vectors_to_labels(self, class_vectors):
        return [self.class_unmapping[tuple(class_vector)] for class_vector in class_vectors]
        
        
    # fast_forward computes the ouput for a given input vector
    def fast_forward(self,input_v):
        # We need to store each layer input in order to perform the backpropagation
        self.inputs = list()
    
        # The input is applied in a layer weights matrix and the bias is added in the result
        # Then, the logistic function is applied to each layer neuron result
        # For a layer, we have a final output vector where each component i represents the output
        # of the neuron i
        for layer in self.layers:
            self.inputs.append(input_v)
            output = logistic_vec(layer.weights @ input_v + layer.bias)
            
            # The output of the current layer is the input of the next one
            input_v = output
        
        return output
    
    # train trains the MLP using the examples passed in the samples parameter
    # The expected output for each example must be passed in the classes parameter;
    # eta represents the MLP learning rate;
    # tol represents the error tolerance. The MLP is trained until the cumulative squared error for all example
    #     is less than the tol value
    # print_status prints the output for each example during the training phase
    def train(self, samples, classes, eta=0.5, alpha=0, tol=1e-2, epoch_max=2000, 
              print_status=False, shuffle=True):
        # Map the class labels to output vectors if it's a classification problem
        if self.classifier:
            self.__get_class_mapping(classes)
            classes = self.__convert_class_labels_to_vectors(classes)
                
        error = tol
        new_error = 3*tol
        epoch = 0
        
        # The training stops when the max number of epochs is reached or the Kramer and Sangiovanni-Vicentelly
        # criteria is valid. According to it, we can consider that the BP converged when the average mean squared
        # error is less than a given tolerance
        while (abs(new_error - error) > tol and epoch < epoch_max):
            epoch += 1
            error = 0
            new_error = 0
            
            # Suffles samples to avoid saturation if training with samples beloging to the same class
            # one after another
            if shuffle:
                samples, classes = shuffle_data(samples, classes)
            
            for input_v, t in zip(samples, classes):  
                # ---- Compute the output for the given input vector ----
                output = self.fast_forward(input_v)
                
                # Compute the mean squared error before the backpropagation
                error_sample = pow((np.array(t)-np.array(output)),2)
                # We need to sum the error of each component when the output is a vector
                error += sum(error_sample)/len(samples)
                
                if (print_status == True):
                    print("\ttraining example: %s from class %s" % (input_v, t), end = " ")
                    print("y = ", output)
     
                # ---- Backpropagation ----
                # Compute the new weights of each layer
                # Remark: the udpated weights are stored as a layer property and the layer is updated once 
                # the backpropagation is finished
                # It's necessary to do so in order to compute the delta value for the inner layers. We need 
                # to use the weights that caused the error to compute the delta instead of the updated weights
                for l in reversed(range(len(self.layers))): # Traverse the layers in reversed order
                    layer = self.layers[l]
             
                    deltas = list()
                    # Compute the delta for each layer neuron n
                    for n in range(len(layer.weights)):
                        # Last Layer
                        if l == (len(self.layers)-1):
                            delta = (t[n]-output[n])*logistic_derivate(output[n])
                            
                        # Inner Layer
                        else:
                            # output of the current layer is the input of the next one
                            neuron_output = self.inputs[l+1][n]
                            # weights of each neuron output
                            errors_weights = self.layers[l+1].weights[:,n]
                            
                            delta = np.dot(delta_next_layer,errors_weights)*logistic_derivate(neuron_output)
                              
                        # Computes the weights and bias variation for the neuron n
                        for w in range(len(layer.weights[n])):
                            layer.d_weights_current[n][w] = delta*self.inputs[l][w]
                        layer.d_bias_current[n] = delta*1 # bias input = 1
                        
                        #for w in range(len(layer.weights[n])):
                        #    layer.updated_weights[n][w] = layer.weights[n][w] + eta*delta*self.inputs[l][w]
                        #layer.updated_bias[n] = layer.bias[n] + (eta*delta*1) # bias input = 1

                        # Store the neuron delta
                        deltas.append(delta)
                    
                    # The neurons' delta of the current layer will be used to compute the deltas of the 
                    # next inner layer
                    delta_next_layer = np.array(deltas)
                     
                # Once the backpropagation is finished for the current example, update all the weigths and bias
                for layer in self.layers:
                    layer.update(eta, alpha)
                    
                # Compute the new error mean squared error
                output = self.fast_forward(input_v)
                error_sample = pow((np.array(t)-np.array(output)),2)
                #print("error sample: ",error_sample)
                new_error += sum(error_sample)/len(samples)
            
            # End of a epoch
            if epoch%1 == 0: # Print status only after each 100 iterations 
                clear_output(wait=True)
                display("End of epoch " + str(epoch) + ". Total Error = " + str(new_error))
        
        # End of training         
        clear_output(wait=True)
        display("End of epoch " + str(epoch) + ". Total Error = " + str(new_error))
        
    # predicts gets a list of input samples and returns a list with the predicted outputs
    def predict(self, samples):
        outputs = list()
        for input_v in samples:
            probs = self.fast_forward(input_v)
            
            if self.classifier:
                class_pos = np.argmax(probs)
                output = np.zeros(len(probs))
                output[class_pos] = 1
            
                #outputs.append(self.class_unmapping[tuple(output)])
                outputs.append(output)
                
            else:
                outputs.append(probs)
    
        if self.classifier:
            return self.__convert_class_vectors_to_labels(outputs)
        
        else:
            return outputs

# Pré Processamento dos Dados

Assim como feito na classificação, iremos pré tatar os dados de forma que todos os atributos e valores de saída de cada amostra da base de dados esteja no intervalo [0,1]. Dessa forma, evitamos a saturação dos neurônios da rede. Além disso, como a função de ativação utilizada é a sigmoide, os seus valores possíveis de saída estão entre 0 e 1.

In [None]:
# normalize data transforms data in order to all points have mean 0 and variance 1
def normalize_data(data):
    normalized_columns = list()
    for c in range(len(data[0])):
        col = data[:,c]
        normalized_columns.append((col - np.mean(col))/np.std(col))

    return np.array(normalized_columns).T

In [None]:
# scale_data transforms data in order to all points be in the interval [0,1]
def scale_data(data):
    normalized_columns = list()
    for c in range(len(data[0])):
        col = data[:,c]
        normalized_columns.append((col-np.min(col))/(np.max(col)-np.min(col)))

    return np.array(normalized_columns).T

# Avaliação do Modelo

Para avaliação do modelo de regressão, iremos calcular o erro quadrático médio existente entre os valores preditos e os valores esperados.

In [None]:
from sklearn.metrics import mean_squared_error
import pandas as pd

def evaluate(real_outputs, predicted_outputs):
    acc = mean_squared_error(real_outputs, predicted_outputs)
    print("MSE: %.4f" % (acc))
    return acc

# Estudos dos Meta Parâmetros

## Leitura do Data Set

Vamos ler nosso conjunto de dados e escalá-los, inclusive as saídas esperadas para que fiquem no entre 0 e 1, evitando a saturação dos neurônios.

In [None]:
df = pd.read_csv('default_features_1059_tracks.txt', header=None)
df.head(5)

data = df.values

In [None]:
# Scale input and outpt to be in the range(0,1)
scaled = scale_data(data)

O conjunto de dados consiste em 68 features com as duas últimas colunas referindo-se à origem de cada música, representando sua longitude e latitude. 

In [None]:
inputs = scaled[:,:-2]
outputs = scaled[:,-2:]
n_outputs = 2

Novamente, vamos realizar as análises referentes à arquitetura da rede e parâmetros de aprendizado dividindo o conjunto de dados em 70% de treinamento e 30% de teste.

## Divisão em Conjunto de Treinamento e Teste


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)

## Teste de diferentes Arquiteturas de Camadas

Assim com feito para a classificação, vamos considerar diferentes arquiteturas de rede, com uma e duas camadas. Entretanto, ao variar o número de neurônios, não o igualaremos ao número de features, à sua metade e ao seu dobro, uma vez que o treinamento para uma camada com apenas 10 neurônios já é lento. Consideraremos camadas com 1/4, 1/2 e igual ao número de features.

Tomaremos apenas 100 ciclos como epoch durante essa etapa!

In [None]:
epochs = 100
accs = dict()

### 1 Layer - 17 neurons

In [None]:
N1 = 17
N = len(X_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(n_outputs, N1))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, 0)] = evaluate(y_test, predicted)

### 1 Layer -  34 neurons

In [None]:
N1 = 34
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(n_outputs, N1))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, 0)] = evaluate(y_test, predicted)

### 1 Layer - 68 neurons

In [None]:
N1 = 68
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(n_outputs, N1))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, 0)] = evaluate(y_test, predicted)

### 2 layers: 17 Neurons - 17 Neurons

In [None]:
N1 = 17
N2 = 17
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 17 Neurons - 34 Neurons

In [None]:
N1 = 17
N2 = 34
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 17 Neurons - 68 Neurons

In [None]:
N1 = 17
N2 = 68
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 34 Neurons - 17 Neurons

In [None]:
N1 = 34
N2 = 17
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 34 Neurons - 34 Neurons

In [None]:
N1 = 34
N2 = 34
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 34 Neurons - 68 Neurons

In [None]:
N1 = 34
N2 = 68
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 68 Neurons - 17 Neurons

In [None]:
N1 = 68
N2 = 17
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 68 Neurons - 34 Neurons

In [None]:
N1 = 68
N2 = 34
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

### 2 layers: 68 Neurons - 34 Neurons

In [None]:
N1 = 68
N2 = 68
N = len(X_train[0])
n_outputs = len(y_train[0])

random.seed(0)
mlp = MLP.MLPRegressor(Layer(N1, N), Layer(N2, N1),Layer(n_outputs, N2))
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

In [None]:
predicted = mlp.predict(X_test)
accs[(N1, N2)] = evaluate(y_test, predicted)

In [None]:
accs

In [None]:
best_acc = 1
best_N1 = 0
best_N2 = 0
for layers, acc in accs.items():
    if acc < best_acc:
        best_acc = acc
        best_N1 = layers[0]
        best_N2 = layers[1]

print("best MSE: ", best_acc)
print("best N1: ", best_N1)
print("best N2: ", best_N2)

## Número de epochs usadas durante o Treinamento

In [None]:
accs = dict()

# 100 epochs

In [None]:
epochs = 100

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted_train = mlp.predict(X_train)
predicted_test = mlp.predict(X_test)

In [None]:
print("=== TRAINING SET ===")
evaluate(y_train, predicted_train)
print("=== TEST SET ===")
accs[epochs] = evaluate(y_test, predicted_test)

# 200 epochs

In [None]:
epochs = 200

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted_train = mlp.predict(X_train)
predicted_test = mlp.predict(X_test)

In [None]:
print("=== TRAINING SET ===")
evaluate(y_train, predicted_train)
print("=== TEST SET ===")
accs[epochs] = evaluate(y_test, predicted_test)

# 400 epochs

In [None]:
epochs = 400

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted_train = mlp.predict(X_train)
predicted_test = mlp.predict(X_test)

In [None]:
print("=== TRAINING SET ===")
evaluate(y_train, predicted_train)
print("=== TEST SET ===")
accs[epochs] = evaluate(y_test, predicted_test)

# 800 epochs

In [None]:
epochs = 800

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted_train = mlp.predict(X_train)
predicted_test = mlp.predict(X_test)

In [None]:
print("=== TRAINING SET ===")
evaluate(y_train, predicted_train)
print("=== TEST SET ===")
accs[epochs] = evaluate(y_test, predicted_test)

# 1000 epochs

In [None]:
epochs = 1000

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=epochs, print_status=False, shuffle=True)

predicted_train = mlp.predict(X_train)
predicted_test = mlp.predict(X_test)

In [None]:
print("=== TRAINING SET ===")
evaluate(y_train, predicted_train)
print("=== TEST SET ===")
accs[epochs] = evaluate(y_test, predicted_test)

In [None]:
accs

In [None]:
best_epoch = 0
best_mse = 1

for epoch, mse in accs.items():
    if mse < best_mse:
        best_mse = mse
        best_epoch = epoch

print("Best MSE:", best_mse)
print("Best epoch:", best_epoch)

# Adicionar comentário sobre possível overfitting

## Learning Rate and Momentum

In [None]:
accs = dict()

### Learning Rate = 0.3 e Momentum = 0.3

In [None]:
eta = 0.3
alpha = 0.3

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.3 e Momentum = 0.5

In [None]:
eta = 0.3
alpha = 0.5

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.3 e Momentum = 0.8

In [None]:
eta = 0.3
alpha = 0.8

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.5 e Momentum = 0.3

In [None]:
eta = 0.5
alpha = 0.3

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.5 e Momentum = 0.5

In [None]:
eta = 0.5
alpha = 0.5

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.5 e Momentum = 0.8

In [None]:
eta = 0.5
alpha = 0.8

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.8 e Momentum = 0.3

In [None]:
eta = 0.8
alpha = 0.3

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.8 e Momentum = 0.5

In [None]:
eta = 0.8
alpha = 0.5

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 0.8 e Momentum = 0.8

In [None]:
eta = 0.8
alpha = 0.8

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

### Learning Rate = 1 e Momentum = 1

In [None]:
eta = 1
alpha = 1

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=eta, alpha=alpha, tol=1e-4, epoch_max=best_epoch, print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[(eta,alpha)] = evaluate(y_test, predicted_test)

In [None]:
accs

In [None]:
total_mse = pd.DataFrame()

best_eta = 0
best_alpha = 0
best_mse = 1
for run, mse in accs.items():
    total_mse.at[str(run[0]), str(run[1])] = mse
    if mse < best_mse:
        best_mse = mse
        best_eta = run[0]
        best_alpha = run[1]
    
print(total_mse)

print("Best MSE: ", best_mse)
print("Best eta: ", best_eta)
print("Best alpha: ", best_alpha)

## Variação do tamanho dos conjuntos de treinamento e teste

Agora, vamos variar o tamanho dos conjuntos de treinamento e teste utilizando a melhor arquitetura encontrada acima e os melhores valores de learning rate e momentum. Utilizaremos incialmente 70% dos dados para treinamento, aumentando gradativamente esse valor até 90%.

### 70% for training, 30% for test¶

In [None]:
accs = dict()

In [None]:
test_size = 0.3

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_size,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[test_size] = evaluate(y_test, predicted_test)

### 75% for training, 25% for test¶

In [None]:
test_size = 0.25

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_size,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[test_size] = evaluate(y_test, predicted_test)

### 80% for training, 20% for test

In [None]:
test_size = 0.2

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_size,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[test_size] = evaluate(y_test, predicted_test)

### 85% for training, 15% for test

In [None]:
test_size = 0.15

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_size,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[test_size] = evaluate(y_test, predicted_test)

### 90% for training, 10% for test

In [None]:
test_size = 0.1

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_size,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[test_size] = evaluate(y_test, predicted_test)

### 95% for training, 5% for test

In [None]:
test_size = 0.05

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_size,random_state=42)

random.seed(0)
best_layers = [Layer(best_N1, N), ]
if best_N2 != 0:
    best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
else:
    best_layers.extend([Layer(n_outputs, best_N1)])
    
mlp = MLP.MLPRegressor(*best_layers)
mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

predicted_test = mlp.predict(X_test)
accs[test_size] = evaluate(y_test, predicted_test)

In [None]:
accs

In [None]:
best_test_size = 0
best_mse = 1

for test_size, mse in accs.items():
    if mse < best_mse:
        best_mse = mse
        best_test_size = test_size
        
print("Best MSE: ", best_mse)
print("Best test size: ", best_test_size)

# Avaliação do Modelo final com Cross Validation 

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=10)
mses_train = list()
mses_test = list()

for train_index, test_index in kf.split(inputs, outputs):
    X_train, X_test = inputs[train_index], inputs[test_index]
    y_train, y_test = outputs[train_index], outputs[test_index]
    
    random.seed(0)
    best_layers = [Layer(best_N1, N), ]
    if best_N2 != 0:
        best_layers.extend([Layer(best_N2, best_N1), Layer(n_outputs, best_N2)])
    else:
        best_layers.extend([Layer(n_outputs, best_N1)])
    
    mlp = MLP.MLPRegressor(*best_layers)
    mlp.train(X_train, y_train, eta=best_eta, alpha=best_alpha, tol=1e-4, epoch_max=best_epoch, 
          print_status=False, shuffle=True)

    predicted_train  = mlp.predict(X_train)
    predicted_test  = mlp.predict(X_test)
    
    mses_train.append(evaluate(y_train, predicted_train))
    mses_test.append(evaluate(y_test, predicted_test))

In [None]:
print("==== MSE in the TRAINING SETS ====")
for (fold, mse_train) in zip(range(10), mses_train):
    print("Fold: %d\tMSE: %.4f" % (fold, mse_train))

print("-------------------")
print("MSE Médio:   %.4f" % np.mean(mses_train))

In [None]:
print("==== MSE in the TEST SETS ====")
for (fold, mse_test) in zip(range(10), mses_test):
    print("Fold: %d\tMSE: %.4f" % (fold, mse_test))

print("-------------------")
print("MSE Médio:   %.4f" % np.mean(mses_test))