In [230]:
import numpy as np
import random
import math
from IPython.display import display, clear_output
from sklearn.utils import shuffle as shuffle_data

random.seed(0)

# Layer represents a MLP Layer
# It has two main properties:
#      - a weigth matrix containing the weights of the layer's neurons. Each line represents a neuron and 
#        the columns represent its corresponding weights
#      - a bias vector, containing the neurons's bias
# Since during the backpropagation we need to compute the weights variation using the old ones, the 
# updated_weights and updated_bias properties store the new values until the update method is called
class Layer:
    # Create a new Layer with 'size' neurons, each one linked to 'inputs_size' inputs
    def __init__(self, size, inputs_size):
        self.size = size
        self.inputs_size = inputs_size
        self.weights = np.array([[random.uniform(-0.1, 0.1) for j in range(inputs_size)] for i in range(size)])
        self.bias = np.array([random.uniform(-0.1,0.1) for i in range(size)])
        
        self.d_weights_current = np.zeros((size, inputs_size))
        self.d_bias_current = np.zeros(size)
        self.d_weights_old = np.zeros((size, inputs_size))
        self.d_bias_old = np.zeros(size)
    
    # update updates the weights and bias matrices with the values stored in the updated ones
    def update(self, eta, alpha):
        #self.weights = np.copy(self.updated_weights)
        #self.bias = np.copy(self.updated_bias)
        
        self.weights = self.weights + eta*self.d_weights_current + alpha*self.d_weights_old 
        self.bias = self.bias + eta*self.d_bias_current + alpha*self.d_bias_old
        
        self.d_weights_old = self.d_weights_current
        self.d_bias_old = self.d_bias_current
        
    # description prints a layer description
    def description(self):
        print("Layer Info")
        print("Weights: \n", self.weights)
        print("Bias: \n ", self.bias)

def logistic(x):
    return 1.0/(1.0+ math.exp(-x))

logistic_vec = np.vectorize(logistic)

def logistic_derivate(x):
    return x*(1.0-x)

In [254]:
class MLP:
    # MLP creation. One might pass the MLP layers as parameters or add them later using the add_layer method.
    # The classification parameter defines if the MLP will be used for a classification or regression problem
    def __init__(self, *layers, classifier=True):
        self.classifier = classifier
        if classifier:
            # Map each class label to a vector with a single 1
            # Ex: Class 0 -> [1,0]
            #     Class 1 -> [0,1]
            self.class_mapping = dict()  
            # Unmap each class vector to the corresponding class label
            # Ex: [1,0] -> Class 0 
            #     [0,1] -> Class 1
            self.class_unmapping = dict()
            
        self.layers = list()
        for layer in layers:
            self.add_layer(layer)
    
    # Shortcut to create a classifier MLP
    @classmethod
    def MLPClassifier(cls, *layers):
        return cls(classifier=True, *layers)   
    
    # Shortcut to create a regressor MLP
    @classmethod
    def MLPRegressor(cls, *layers):
        return cls(classifier=False, *layers)
    
    # add_layer adds a new layer on the MLP. It verifies whether or not the new layer is compatible with the MLP
    def add_layer(self, layer):
        # If there's already a layer in the MLP, verify if the new layer is compatible
        if len(self.layers) > 0:
            if layer.inputs_size != self.layers[-1].size:
                print("The new layer is incompatible with the MLP")
                print("Please, use a layer where each neuron has the same amount of inputs as the number" \
                     "of neurons in the MLP last layer")
        
        self.layers.append(layer)
    
    # description prints the info about the MLP layers
    def description(self):
        print("MLP Classifier?: ", self.classifier)
        print("-------------------------")
        print("MLP Info:")
        for layer, i in zip(self.layers, range(len(self.layers))):
            print("--- Layer: %d ---" % i)
            layer.description()
            
    # __get_class_mapping gets the class labels in the classes list and builds the mapping dicionaries
    # class_mapping and class_unmapping
    def __get_class_mapping(self, classes):
        class_labels = np.unique(classes)
        
        for c in range(len(class_labels)):
            class_label = class_labels[c]
            class_vector = np.zeros(len(class_labels))
            class_vector[c] = 1
    
            self.class_mapping[class_label] = class_vector
            
            # We can't use a list as a hash key. So transform it into a tuple
            self.class_unmapping[tuple(class_vector)] = class_label
        
    # __convert_class_labels_to_vectors converts a list with class labels to a list with 
    # vectors that maps each class label
    def __convert_class_labels_to_vectors(self, class_labels):
        return [self.class_mapping[c] for c in class_labels]
    
    # __convert_class_vectors_to_labels converts a list with class vectors to a list with 
    # the corresponding class labels
    def __convert_class_vectors_to_labels(self, class_vectors):
        return [self.class_unmapping[tuple(class_vector)] for class_vector in class_vectors]
        
        
    # fast_forward computes the ouput for a given input vector
    def fast_forward(self,input_v):
        # We need to store each layer input in order to perform the backpropagation
        self.inputs = list()
    
        # The input is applied in a layer weights matrix and the bias is added in the result
        # Then, the logistic function is applied to each layer neuron result
        # For a layer, we have a final output vector where each component i represents the output
        # of the neuron i
        for l in range(len(self.layers)-1):
            layer = self.layers[l]
            self.inputs.append(input_v)
            output = logistic_vec(layer.weights @ input_v + layer.bias)
            
            # The output of the current layer is the input of the next one
            input_v = output
            
        # Last layer
        self.inputs.append(input_v)
        layer = self.layers[-1]
        output = layer.weights @ input_v + layer.bias
        
        # If it's a classification problem, apply the softmax function on the output of the last layer
        # It it's a regression problem, the activation function of the last layer is the identity function
        if self.classifier:
            #print("OUTPUT: ", output)
            #output = np.exp(output)/sum(np.exp(output))
            output = logistic_vec(output)
            
        else:
            output = logistic_vec(output)
          
        return output
    
    # train trains the MLP using the examples passed in the samples parameter
    # The expected output for each example must be passed in the classes parameter;
    # eta represents the MLP learning rate;
    # tol represents the error tolerance. The MLP is trained until the cumulative squared error for all example
    #     is less than the tol value
    # print_status prints the output for each example during the training phase
    def train(self, samples, classes, eta=0.5, alpha=0, tol=1e-2, epoch_max=2000, 
              print_status=False, shuffle=True):
        # Map the class labels to output vectors if it's a classification problem
        if self.classifier:
            self.__get_class_mapping(classes)
            classes = self.__convert_class_labels_to_vectors(classes)
                
        error = tol
        new_error = 3*tol
        epoch = 0
        
        # The training stops when the max number of epochs is reached or the Kramer and Sangiovanni-Vicentelly
        # criteria is valid. According to it, we can consider that the BP converged when the average mean squared
        # error is less than a given tolerance
        while (abs(new_error - error) > tol and epoch < epoch_max):
            epoch += 1
            error = 0
            new_error = 0
            
            if shuffle:
                samples, classes = shuffle_data(samples, classes)
            
            for input_v, t in zip(samples, classes):  
                # ---- Compute the output for the given input vector ----
                output = self.fast_forward(input_v)
                
                # Compute the mean squared error before the backpropagation
                error_sample = pow((np.array(t)-np.array(output)),2)
                # We need to sum the error of each component when the output is a vector
                error += sum(error_sample)/len(samples)
                
                if (print_status == True):
                    print("\ttraining example: %s from class %s" % (input_v, t), end = " ")
                    print("y = ", output)

                    
                # ---- Backpropagation ----
                # Compute the new weights of each layer
                # Remark: the udpated weights are stored as a layer property and the layer is updated once 
                # the backpropagation is finished
                # It's necessary to do so in order to compute the delta value for the inner layers. We need 
                # to use the weights that caused the error to compute the delta instead of the updated weights
                for l in reversed(range(len(self.layers))): # Traverse the layers in reversed order
                    layer = self.layers[l]
             
                    deltas = list()
                    # Compute the delta for each layer neuron n
                    for n in range(len(layer.weights)):
                        # Last Layer
                        if l == (len(self.layers)-1):
                            delta = (t[n]-output[n])*logistic_derivate(output[n])
                            
                        # Inner Layer
                        else:
                            # output of the current layer is the input of the next one
                            neuron_output = self.inputs[l+1][n]
                            # weights of each neuron output
                            errors_weights = self.layers[l+1].weights[:,n]
                            
                            delta = np.dot(delta_next_layer,errors_weights)*logistic_derivate(neuron_output)
                              
                        # Computes the weights and bias variation for the neuron n
                        for w in range(len(layer.weights[n])):
                            layer.d_weights_current[n][w] = delta*self.inputs[l][w]
                        layer.d_bias_current[n] = delta*1 # bias input = 1
                        
                        #for w in range(len(layer.weights[n])):
                        #    layer.updated_weights[n][w] = layer.weights[n][w] + eta*delta*self.inputs[l][w]
                        #layer.updated_bias[n] = layer.bias[n] + (eta*delta*1) # bias input = 1

                        # Store the neuron delta
                        deltas.append(delta)
                    
                    # The neurons' delta of the current layer will be used to compute the deltas of the 
                    # next inner layer
                    delta_next_layer = np.array(deltas)
                     
                # Once the backpropagation is finished for the current example, update all the weigths and bias
                for layer in self.layers:
                    layer.update(eta, alpha)
                    
                # Compute the new error mean squared error
                output = self.fast_forward(input_v)
                error_sample = pow((np.array(t)-np.array(output)),2)
                #print("error sample: ",error_sample)
                new_error += sum(error_sample)/len(samples)
            
            # End of a epoch
            if epoch%1 == 0: # Print status only after each 100 iterations 
                clear_output(wait=True)
                display("End of epoch " + str(epoch) + ". Total Error = " + str(new_error))
        
        # End of training         
        clear_output(wait=True)
        display("End of epoch " + str(epoch) + ". Total Error = " + str(new_error))
        
    # predicts gets a list of input samples and returns a list with the predicted outputs
    def predict(self, samples):
        outputs = list()
        for input_v in samples:
            probs = self.fast_forward(input_v)
            
            if self.classifier:
                class_pos = np.argmax(probs)
                output = np.zeros(len(probs))
                output[class_pos] = 1
            
                #outputs.append(self.class_unmapping[tuple(output)])
                outputs.append(output)
                
            else:
                outputs.append(probs)
    
        if self.classifier:
            return self.__convert_class_vectors_to_labels(outputs)
        
        else:
            return outputs

In [232]:
random.seed(0)

samples = [[0,0], [1,0], [1,1], [0,1]]
classes = [[1,0],[0,1],[1,0],[0,1]]

mlp = MLP.MLPClassifier(Layer(5,2), Layer(2,5))
mlp.train([[0,0], [1,0], [1,1], [0,1]], [0,1,0,1], eta=0.5, alpha=0.8, tol=1e-4, print_status=False)
predicted = mlp.predict(samples)

'End of epoch 1118. Total Error = 0.004833229768627075'

In [233]:
mlp.predict([[0,0],[0,1],[1,0],[1,1]])

[0, 1, 1, 0]

# Data Pre Processing

In [234]:
# normalize data transforms data in order to all points have mean 0 and variance 1
def normalize_data(data):
    normalized_columns = list()
    for c in range(len(data[0])):
        col = data[:,c]
        normalized_columns.append((col - np.mean(col))/np.std(col))

    return np.array(normalized_columns).T

In [235]:
# scale_data transforms data in order to all points be in the interval [0,1]
def scale_data(data):
    normalized_columns = list()
    for c in range(len(data[0])):
        col = data[:,c]
        normalized_columns.append((col-np.min(col))/(np.max(col)-np.min(col)))

    return np.array(normalized_columns).T

Read Data

In [236]:
import pandas as pd

df = pd.read_csv('winequality-red.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,category
0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Mid
1,1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,Mid
2,2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,Mid
3,3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,Mid
4,4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Mid


In [237]:
# Separação dos Dados
inputs = df[df.columns[1:-1]].values
classes = df[df.columns[-1]].values

print(inputs[0:2,:])
print(classes)

[[ 7.4     0.7     0.      1.9     0.076  11.     34.      0.9978  3.51
   0.56    9.4   ]
 [ 7.8     0.88    0.      2.6     0.098  25.     67.      0.9968  3.2
   0.68    9.8   ]]
['Mid' 'Mid' 'Mid' ... 'Mid' 'Mid' 'Mid']


In [238]:
np.unique(classes)

array(['Bad', 'Good', 'Mid'], dtype=object)

In [64]:
n_classes = len(np.unique(classes))
N = len(inputs[0])
mlp = MLP(Layer(2*N, N), Layer(n_classes, 2*N))

mlp.train(inputs, classes, eta=0.5, alpha=1.2, tol=1e-2, print_status=False)


'End of epoch 10. Total Error = 0.23409733106997108'

KeyboardInterrupt: 

CUIDADO: Problema. math range na função math.exp -> saturação da saída dos neurônios provavelmente devido ao fato dos atributos não estarem normalizados entre 0 e 1.

Outro problema: Os atributos não possuem mesma variância.

Pré-Processamento: Colocar mais exemplos para que todas classes fiquem com mesma quantidade de exemplos

In [239]:
unique, counts = np.unique(classes, return_counts=True)

In [240]:
unique

array(['Bad', 'Good', 'Mid'], dtype=object)

In [241]:
counts

array([  63,  217, 1319])

In [242]:
normalized = normalize_data(inputs)
normalized

array([[-0.52835961,  0.96187667, -1.39147228, ...,  1.28864292,
        -0.57920652, -0.96024611],
       [-0.29854743,  1.96744245, -1.39147228, ..., -0.7199333 ,
         0.1289504 , -0.58477711],
       [-0.29854743,  1.29706527, -1.18607043, ..., -0.33117661,
        -0.04808883, -0.58477711],
       ...,
       [-1.1603431 , -0.09955388, -0.72391627, ...,  0.70550789,
         0.54204194,  0.54162988],
       [-1.39015528,  0.65462046, -0.77526673, ...,  1.6773996 ,
         0.30598963, -0.20930812],
       [-1.33270223, -1.21684919,  1.02199944, ...,  0.51112954,
         0.01092425,  0.54162988]])

Compara with SKLearn standard scaler

In [243]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
print(scaler.fit_transform(inputs))

[[-0.52835961  0.96187667 -1.39147228 ...  1.28864292 -0.57920652
  -0.96024611]
 [-0.29854743  1.96744245 -1.39147228 ... -0.7199333   0.1289504
  -0.58477711]
 [-0.29854743  1.29706527 -1.18607043 ... -0.33117661 -0.04808883
  -0.58477711]
 ...
 [-1.1603431  -0.09955388 -0.72391627 ...  0.70550789  0.54204194
   0.54162988]
 [-1.39015528  0.65462046 -0.77526673 ...  1.6773996   0.30598963
  -0.20930812]
 [-1.33270223 -1.21684919  1.02199944 ...  0.51112954  0.01092425
   0.54162988]]


In [244]:
N = len(inputs[0])
mlp = MLP(Layer(N, N), Layer(n_classes, N))

random.seed(0)
mlp.train(normalized, classes, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=500, print_status=False, shuffle=False)

'End of epoch 500. Total Error = 0.14032719381344355'

In [245]:
predicted = mlp.predict(normalized)

In [246]:
classes[8]

'Good'

In [247]:
from sklearn.metrics import accuracy_score
accuracy_score(np.array(classes), np.array(predicted))

0.8974358974358975

In [248]:
from sklearn.metrics import confusion_matrix
confusion_matrix(classes, predicted)

array([[  13,    3,   47],
       [   1,  118,   98],
       [   0,   15, 1304]])

Com Shuffling

In [251]:
N = len(inputs[0])
mlp = MLP(Layer(N, N), Layer(n_classes, N))

random.seed(0)
mlp.train(normalized, classes, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=500, print_status=False, shuffle=False)

'End of epoch 500. Total Error = 0.1184189503961069'

In [252]:
predicted = mlp.predict(normalized)
accuracy_score(np.array(classes), np.array(predicted))

0.9299562226391495

In [253]:
confusion_matrix(classes, predicted)

array([[  27,    3,   33],
       [   0,  163,   54],
       [   3,   19, 1297]])

Shuffle a cada iteração

In [165]:
N = len(inputs[0])
mlp = MLP(Layer(N, N), Layer(n_classes, N))

random.seed(0)
mlp.train(normalized, classes, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=500, print_status=False, shuffle=False)

'End of epoch 500. Total Error = 0.1184189503961069'

In [166]:
predicted = mlp.predict(normalized)
accuracy_score(np.array(classes), np.array(predicted))

0.9299562226391495

In [167]:
confusion_matrix(classes, predicted)

array([[  27,    3,   33],
       [   0,  163,   54],
       [   3,   19, 1297]])

## Balanceado

In [255]:
df = pd.read_csv('winequality-red.csv')
df.head(5)

inputs = df[df.columns[1:-1]].values
classes = df[df.columns[-1]].values

normalized = data_normalization(inputs)

unique, counts = np.unique(classes, return_counts=True)
print(unique)
print(counts)

['Bad' 'Good' 'Mid']
[  63  217 1319]


In [256]:
examples = min(counts)
examples

63

In [257]:
bad_indices = np.random.choice(np.where(classes == 'Bad')[0], examples)
good_indices = np.random.choice(np.where(classes == 'Good')[0], examples)
mid_indices = np.random.choice(np.where(classes == 'Mid')[0], examples)

under_sampled_examples = normalized[bad_indices]
under_sampled_classes = classes[bad_indices]

under_sampled_examples = np.append(under_sampled_examples,normalized[good_indices], axis=0)
under_sampled_classes = np.append(under_sampled_classes, classes[good_indices], axis=0)

under_sampled_examples = np.append(under_sampled_examples,normalized[mid_indices], axis=0)
under_sampled_classes = np.append(under_sampled_classes, classes[mid_indices], axis=0)

len(under_sampled_examples)
print(len(under_sampled_classes))

189


In [258]:
n_classes = len(np.unique(classes))
print("n classes:", n_classes)

n classes: 3


In [259]:
random.seed(1)
N = len(inputs[0])
mlp = MLP.MLPClassifier(Layer(N, N), Layer(n_classes, N))

mlp.train(under_sampled_examples, under_sampled_classes, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=500, print_status=False)

'End of epoch 220. Total Error = 0.08730661839919339'

In [260]:
predicted = mlp.predict(under_sampled_examples)

In [261]:
from sklearn.metrics import accuracy_score
accuracy_score(under_sampled_classes, predicted)

0.9365079365079365

In [262]:
confusion_matrix(under_sampled_classes, predicted)

array([[60,  0,  3],
       [ 0, 59,  4],
       [ 0,  5, 58]])

## Data Scaling

In [263]:
df = pd.read_csv('winequality-red.csv')
df.head(5)

inputs = df[df.columns[1:-1]].values
classes = df[df.columns[-1]].values

scaled = scale_data(inputs)

unique, counts = np.unique(classes, return_counts=True)
print(unique)
print(counts)

['Bad' 'Good' 'Mid']
[  63  217 1319]


In [264]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

scaled_sk = scaler.fit_transform(inputs)
scaled_sk

array([[0.24778761, 0.39726027, 0.        , ..., 0.60629921, 0.13772455,
        0.15384615],
       [0.28318584, 0.52054795, 0.        , ..., 0.36220472, 0.20958084,
        0.21538462],
       [0.28318584, 0.43835616, 0.04      , ..., 0.40944882, 0.19161677,
        0.21538462],
       ...,
       [0.15044248, 0.26712329, 0.13      , ..., 0.53543307, 0.25149701,
        0.4       ],
       [0.11504425, 0.35958904, 0.12      , ..., 0.65354331, 0.22754491,
        0.27692308],
       [0.12389381, 0.13013699, 0.47      , ..., 0.51181102, 0.19760479,
        0.4       ]])

In [265]:
scaled

array([[0.24778761, 0.39726027, 0.        , ..., 0.60629921, 0.13772455,
        0.15384615],
       [0.28318584, 0.52054795, 0.        , ..., 0.36220472, 0.20958084,
        0.21538462],
       [0.28318584, 0.43835616, 0.04      , ..., 0.40944882, 0.19161677,
        0.21538462],
       ...,
       [0.15044248, 0.26712329, 0.13      , ..., 0.53543307, 0.25149701,
        0.4       ],
       [0.11504425, 0.35958904, 0.12      , ..., 0.65354331, 0.22754491,
        0.27692308],
       [0.12389381, 0.13013699, 0.47      , ..., 0.51181102, 0.19760479,
        0.4       ]])

In [266]:
examples = min(counts)
examples

bad_indices = np.random.choice(np.where(classes == 'Bad')[0], examples)
good_indices = np.random.choice(np.where(classes == 'Good')[0], examples)
mid_indices = np.random.choice(np.where(classes == 'Mid')[0], examples)

under_sampled_examples = scaled[bad_indices]
under_sampled_classes = classes[bad_indices]

under_sampled_examples = np.append(under_sampled_examples,scaled[good_indices], axis=0)
under_sampled_classes = np.append(under_sampled_classes, classes[good_indices], axis=0)

under_sampled_examples = np.append(under_sampled_examples,scaled[mid_indices], axis=0)
under_sampled_classes = np.append(under_sampled_classes, classes[mid_indices], axis=0)

len(under_sampled_examples)
print(len(under_sampled_classes))

189


In [267]:
n_classes = len(np.unique(classes))
print("n classes:", n_classes)

n classes: 3


In [268]:
random.seed(1)
N = len(inputs[0])
mlp = MLP.MLPClassifier(Layer(6, N), Layer(n_classes, 6))

mlp.train(under_sampled_examples, under_sampled_classes, eta=0.3, alpha=0.1, tol=1e-4, epoch_max=500, 
          print_status=False)

'End of epoch 500. Total Error = 0.22749142563479316'

In [269]:
predicted = mlp.predict(under_sampled_examples)
accuracy_score(under_sampled_classes, predicted)

0.8518518518518519

In [270]:
confusion_matrix(under_sampled_classes, predicted)

array([[52,  0, 11],
       [ 0, 57,  6],
       [ 2,  9, 52]])

### Com SKLearn

In [None]:
clf = MLPClassifier(solver='sgd', alpha=1e-5, activation='logistic', learning_rate='constant', tol=1e-10,
                    learning_rate_init=0.5, momentum=0.5, hidden_layer_sizes=(N), random_state=1,
                    shuffle=False, max_iter=6000)


clf.fit(under_sampled_examples, mapped_under_sampled_classes)

In [None]:
predicted = clf.predict(under_sampled_examples)
accuracy_score(np.array(mapped_under_sampled_classes), predicted)

## MLP Sklearn

In [None]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver='sgd', alpha=1e-5, activation='logistic', learning_rate='constant', tol=1e-10,
                    learning_rate_init=0.5, momentum=0.5, hidden_layer_sizes=(N), random_state=1,
                    shuffle=False, max_iter=1000)


clf.fit(normalized, mapped_classes)

In [None]:
clf.loss_

In [None]:
mapped_classes[8]

In [None]:
predicted = clf.predict(normalized)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(np.array(mapped_classes), predicted)

# Regression

In the 'default_features_1059_tracks.txt' file, the first 68 columns are audio features of the track, and the last two columns are the origin of the music, represented by latitude and longitude. 

In [271]:
df = pd.read_csv('default_features_1059_tracks.txt', header=None)
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,7.161286,7.835325,2.911583,0.984049,-1.499546,-2.094097,0.576,-1.205671,1.849122,-0.425598,...,-1.504263,0.351267,-1.018726,-0.174878,-1.089543,-0.66884,-0.914772,-0.83625,-15.75,-47.95
1,0.225763,-0.094169,-0.603646,0.497745,0.874036,0.29028,-0.077659,-0.887385,0.432062,-0.093963,...,-0.495712,-0.465077,-0.157861,-0.157189,0.380951,1.088478,-0.123595,1.391141,14.91,-23.51
2,-0.692525,-0.517801,-0.788035,1.214351,-0.907214,0.880213,0.406899,-0.694895,-0.901869,-1.701574,...,-0.637167,0.14726,0.217914,2.718442,0.972919,2.081069,1.375763,1.063847,12.65,-8.0
3,-0.735562,-0.684055,2.058215,0.716328,-0.011393,0.805396,1.497982,0.114752,0.692847,0.052377,...,-0.178325,-0.065059,-0.724247,-1.020687,-0.75138,-0.385005,-0.012326,-0.392197,9.03,38.74
4,0.570272,0.273157,-0.279214,0.083456,1.049331,-0.869295,-0.265858,-0.401676,-0.872639,1.147483,...,-0.919463,-0.667912,-0.820172,-0.190488,0.306974,0.119658,0.271838,1.289783,34.03,-6.85


In [272]:
data = df.values
#n_data = data_normalization(data)

In [273]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

scaled = scaler.fit_transform(data)
scaled

array([[0.79230368, 0.76352811, 0.55478876, ..., 0.06751881, 0.2172705 ,
        0.17155709],
       [0.16001751, 0.11334916, 0.07269012, ..., 0.37235778, 0.55801289,
        0.27429797],
       [0.07630057, 0.07861345, 0.04740195, ..., 0.32756458, 0.5328962 ,
        0.33949891],
       ...,
       [0.0690223 , 0.06608503, 0.04021757, ..., 0.1034345 , 0.8516337 ,
        0.45636455],
       [0.04854589, 0.03092565, 0.6375824 , ..., 0.0680776 , 1.        ,
        0.47952749],
       [0.12567753, 0.11333572, 0.07745744, ..., 0.38213486, 1.        ,
        0.47952749]])

In [274]:
inputs = scaled[:,:-2]
outputs = scaled[:,-2:]

In [275]:
inputs

array([[0.79230368, 0.76352811, 0.55478876, ..., 0.07543846, 0.04642531,
        0.06751881],
       [0.16001751, 0.11334916, 0.07269012, ..., 0.30406071, 0.16613812,
        0.37235778],
       [0.07630057, 0.07861345, 0.04740195, ..., 0.43319408, 0.39300562,
        0.32756458],
       ...,
       [0.0690223 , 0.06608503, 0.04021757, ..., 0.06902011, 0.03600491,
        0.1034345 ],
       [0.04854589, 0.03092565, 0.6375824 , ..., 0.14456785, 0.06666088,
        0.0680776 ],
       [0.12567753, 0.11333572, 0.07745744, ..., 0.84472242, 0.56410745,
        0.38213486]])

In [276]:
outputs

array([[0.2172705 , 0.17155709],
       [0.55801289, 0.27429797],
       [0.5328962 , 0.33949891],
       ...,
       [0.8516337 , 0.45636455],
       [1.        , 0.47952749],
       [1.        , 0.47952749]])

In [277]:
n_features = len(inputs[0])
mlpR = MLP.MLPRegressor(Layer(int(math.floor(n_features/2)), n_features), Layer(2, int(math.floor(n_features/2))))

In [278]:
mlpR.train(inputs, outputs, eta=0.5, alpha=0.5, tol=1e-4, epoch_max=2000, print_status=False)

'End of epoch 2000. Total Error = 0.0029200341639355025'

In [None]:
predicted = mlpR.predict(inputs)

In [None]:
def mean_squared_error(y_true, y_predicted):
    y_true = np.array(y_true)
    y_predicted = np.array(y_predicted)
    
    # Average Mean Squared error per sample
    samples_error = np.average((y_true-y_predicted)**2, axis=1) 
    
    # Average Mean Squared Error for all samples
    return np.average(samples_error, axis=0)

In [None]:
mean_squared_error(outputs, predicted)

In [None]:
from sklearn.metrics import mean_squared_error
mean_squared_error(outputs, predicted)

In [None]:
predicted[4]

In [None]:
outputs[4]