# Grupo 3 - MLP Applied on Wine and Musical Datasets (Trabalho 1)

---------------------------------------------

### Participants:
 - Francielle Vargas - 9527629
 - Lucas Nunes Sequeira - 9009642
 - Emanuel Huber - 12110113

#### Date: 12/09/2021

---------------------------------------------

#### Descrição e Instrução

Este notebook foi feito para a disciplina SCC5809 - Redes Neurais

No notebook contém

1. A implementação da classe **Perceptron**
2. A implementação da classe **PerceptronLayer**
3. A implementação da classe **MLP** + *Momentum*
4. Resolução do problema **Wine**
5. Resolução do problema de **Geographical Origin of Music**

Para utilizá-lo basta executar todas as células.

**Importante:** certifique-se de utilizar este notebook no Colab, isto evita erros de versionamento de dependências e permite o uso de comandos bash necessários para baixar os datasets.

_link de acesso ao colab: https://colab.research.google.com/drive/14XRBYj-n1FgYXNStgNd1rn6zcmty8T0p?usp=sharing_

### Libs

In [None]:
# Install Libs
!pip install numpy==1.19.5
!pip install tqdm==4.62.0
!pip install plotly==4.4.1
!pip install pandas==1.1.5
!pip install scikit-learn==0.24.2



In [None]:
# Math lib
import numpy as np

# Log lib
from tqdm.auto import tqdm

# Visualization Lib
import plotly.express as px

# Copy for deepcopy
import copy

# Pandas for visualization
import pandas as pd

# Wine data
from sklearn.datasets import load_wine

# Split data
from sklearn.model_selection import train_test_split

# Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Metrics
from sklearn.metrics import accuracy_score

# Feature selection
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import SelectFromModel

# Seed
SEED = 42

# Set random seed
np.random.seed(SEED)

### Activation function

In [None]:
class Sigmoid(object):
  '''
  Sigmoid Activation Function
  
    f(x) =  1 / (1 + e^(-x))
  '''

  def __init__(self):

    # Last call data
    self.last_grad = 0
    self.last_input = 0
    self.last_output = 0

  def _update_last_call(self, x, y, grad = True):
    '''Update last call data'''

    # Gradient
    if grad:
      self.last_grad = self.gradient(x)

    # Update last input and output
    self.last_input = x
    self.last_output = y

  def __call__(self, x):
    '''Calculate sigmoid function of x'''

    if isinstance(x, list):
      x = np.array(x)

    return 1 / (1 + np.exp(-x))

  def calculate(self, x, grad: bool = True):
    '''Calculate sigmoid function of x'''

    # Calculation
    y = self(x)

    # Update last call data
    self._update_last_call(x, y, grad)

    return y

  def copy(self):
    return Sigmoid()

  def gradient(self, x):
    '''Calculate sigmoid gradient within x'''

    return self(x)*(1 - self(x))

In [None]:
class Swish(object):
  '''
  Swish Activation Function
  
    f(x) =  x * sigmoid(x)
  '''

  def __init__(self):

    self.sigmoid = Sigmoid()

    # Last call data
    self.last_grad = 0
    self.last_input = 0
    self.last_output = 0

  def _update_last_call(self, x, y, grad = True):
    '''Update last call data'''

    # Gradient
    if grad:
      self.last_grad = self.gradient(x)

    # Update last input and output
    self.last_input = x
    self.last_output = y

  def __call__(self, x):
    '''Calculate swish function of x'''

    if isinstance(x, list):
      x = np.array(x)

    return x*self.sigmoid(x)

  def calculate(self, x, grad: bool = True):
    '''Calculate swish function of x'''

    # Calculation
    y = self(x)

    # Update last call data
    self._update_last_call(x, y, grad)
    
    return y

  def copy(self):
    return Swish()

  def gradient(self, x):
    '''Calculate swish gradient within x'''

    return self.sigmoid(x)*(1 + x*(1 - self.sigmoid(x)))

In [None]:
class Relu(object):
  '''
  Relu Activation Function
  
    f(x) =  
      0 if x < 0
      x if x >= 0
  '''

  def __init__(self):

    # Last call data
    self.last_grad = 0
    self.last_input = 0
    self.last_output = 0

  def _update_last_call(self, x, y, grad = True):
    '''Update last call data'''

    # Gradient
    if grad:
      self.last_grad = self.gradient(x)

    # Update last input and output
    self.last_input = x
    self.last_output = y

  def __call__(self, x):
    '''Calculate relu function of x'''

    return np.where(x < 0, 0.0, x)

  def calculate(self, x, grad: bool = True):
    '''Calculate relu function of x'''

    # Calculation
    y = self(x)

    # Update last call data
    self._update_last_call(x, y, grad)
    
    return y

  def copy(self):
    return Relu()

  def gradient(self, x):
    '''Calculate relu gradient within x'''

    return np.where(x < 0, 0.0, 1)

### Loss Function

In [None]:
class MSE(object):
  '''
  Mean Squared Error Loss Function
  
    f(x) =  1/(2*m) * sum((ref_y - hyp_y)**2)
  '''

  def __init__(self):
    
    # Last call data
    self.last_grad = 0
    self.last_input = 0
    self.last_output = 0

  def _update_last_call(self, x, y, grad = True):
    '''Update last call data'''

    # Gradient
    if grad:
      self.last_grad = self.gradient(x[0], x[1])

    # Update last input and output
    self.last_input = x
    self.last_output = y

  def __call__(self, ref: np.ndarray, hyp: np.ndarray):
    '''Calculate mean squared error between ref and hyp'''

    if isinstance(ref, list):
      ref = np.array(ref)
    if isinstance(hyp, list):
      hyp = np.array(hyp)

    size = len(ref)

    # Quadratic Error Sum
    quadratic_sum = np.sum((ref - hyp)**2)

    return quadratic_sum/(2*size)

  def calculate(self, ref: np.ndarray, hyp: np.ndarray, grad: bool = True):
    '''Calculate mean squared error between ref and hyp'''

    # Calculation
    y = self(ref, hyp)

    # Update last call data
    self._update_last_call((ref, hyp), y, grad)

    return y

  def copy(self):
    return MSE()

  def gradient(self, ref: np.ndarray, hyp: np.ndarray):
    '''Calculate MSE gradient within hyp'''

    size = len(ref)

    return -np.sum(ref - hyp)/size

### Metrics Function

In [None]:
class MultiClassAccuracy(object):
  '''
  MultiClass Accuracy Score
  '''

  def __init__(self):
    
    self.func = accuracy_score

  def one_hot_decode(y):
    '''One hot decode y'''

    return np.argmax(y, axis=1)

  def __call__(self, ref: np.ndarray, hyp: np.ndarray):
    '''Apply one hot decode and calculate accuracy ref and hyp'''

    if isinstance(ref, list):
      ref = np.array(ref)
    if isinstance(hyp, list):
      hyp = np.array(hyp)

    # One hot decode
    y_true = one_hot_decode(ref)
    y_pred = one_hot_decode(hyp)

    # Calculate score
    score = self.func(y_true, y_pred)

    return score

  def calculate(self, ref: np.ndarray, hyp: np.ndarray):
    '''Calculate mean squared error between ref and hyp'''

    # Calculation
    y = self(ref, hyp)

    return y

  def copy(self):
    return MultiClassAccuracy()

### Perceptron Class

In [None]:
class Perceptron():

  def __init__(self, input_size: int = 2, init_rule: str = 'zero', activation = None):
    '''Initializes Perceptron
    
      Params:
        input_size (int): size of input data
        init_rule (str): initialization parameter to set initial weight values
        activation (func): function to apply activation step
      '''

    # Perceptron size
    self.input_size = input_size
    self.size = input_size + 1

    # Initialize weights
    self.init_weights(init_rule)

    # Activation function
    if activation is None:
      activation = Sigmoid()
    self.activation = activation.copy()

    # Health check
    self._health_check()
  
  def _health_check(self):
    '''Perform a health check prediction'''

    # 1D (input_size)
    X = np.random.rand(self.input_size)
    self(X)

    # 2D (4 items, input_size)
    X = np.random.rand(4, self.input_size)
    self(X)

  def init_weights(self, init_rule: str = 'zero'):
    '''Initialize weights
    
      Params:
        init_rule (str): initialization parameter to set initial weight values
    '''

    # Assure initializarion rule
    assert init_rule in ['zero', 'rand'], "'init_rule' must be zero or rand"

    if init_rule == 'zero':
      # Zero values
      self.weights = np.zeros(self.size)
    
    elif init_rule == 'rand':
      # Random values in [-0.1, 0,1]
      self.weights = np.random.rand(self.size) - 0.5
      self.weights /= 0.5

  def set_weights(self, weights: np.ndarray):
    '''Update weights

      Params:
        weights (np.ndarray): array of weights
    '''

    # Assure perceptron size equals given weights size
    assert self.size == len(weights), f"Perceptron size ({self.size}) != weights size ({len(weights)})"

    # Update weights
    self.weights = weights.copy()

  def get_weights(self) -> np.ndarray:
    '''Return a copy of current weights'''

    # Return weights
    return self.weights.copy()

  def _add_bias_term(self, X: np.ndarray, value = 1.0, is_batch = True) -> np.ndarray:
    '''Add bias term to X values, ie:
       Given (x_i) in = [1, 0] -> out: [value, 1, 0]

      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        value (float): Bias factor value. Default = 1.0
        is_batch (bool): Boolean to explicity that is or not a batch of items

      Returns:
        X (np.ndarray) with the bias term concatenated, eg:
    '''

    if is_batch:
      # Get batch_size
      batch_size = X.shape[0]

      # Add bias term
      X = np.concatenate([value*np.ones((batch_size, 1)), X], axis=1)
    
    else:
      # Add bias term
      X = np.concatenate([[value], X])

    return X

  def _prepare_input(self, X: np.ndarray) -> np.ndarray:
    '''Prepare input X
       
       1. Add batch dimension (if applies)
       2. Add bias term

      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
    '''

    # Make sure is a np.ndarray
    X = np.array(X)

    # Verify if it is sigle item and batch it
    if len(X.shape) == 1:
      # Add batch dimension
      X = np.expand_dims(X, 0)

    # Add bias term
    X = self._add_bias_term(X)

    return X


  def forward(self, X: np.ndarray) -> np.ndarray:
    '''Make a batch or single prediction
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        
      Returns:
        output (np.ndarray) logits
    '''
    
    # Prepare input data
    X = self._prepare_input(X)

    # Inner product of inputs and weigths (net)
    net = self.weights * X # multiplication
    net = np.sum(net, axis = 1) # sum reduction

    # Apply activation
    output = self.activation.calculate(net)

    return output

  def __call__(self, X: np.ndarray) -> np.ndarray:
    '''Make a batch or single prediction (runs forward method)
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        
      Returns:
        output (np.ndarray) logits
    '''

    return self.forward(X)

### Perceptron Layer Class

In [None]:
class PerceptronLayer():

  def __init__(self, input_size: int = 2, units: int = 2, init_rule: str = 'zero',
               activation = None):
    '''Initializes Perceptron Layer
    
      Params:
        input_size (int): size of input data
        units (int): perceptron units to use in layer
        init_rule (str): initialization parameter to set initial weight values
        activation (func): function to apply activation step
      '''

    # Perceptron Layer sizes
    self.input_size = input_size
    self.size = input_size + 1
    self.num_units = units
    self.output_size = self.num_units

    # Activation function
    if activation is None:
      activation = Sigmoid()
    self.activation = activation

    # Initialization rule
    self.init_rule = init_rule

    # Initialize units
    self._init_units()

    # Health check
    self._health_check()

  def _save_foward_transform(self, x, y):
    self.last_input = x
    self.last_output = y

  def _init_units(self):
    '''Initialize units of Layer'''

    # Layer list
    self.units = []

    # Iterate of amount of units
    for unit_id in range(self.num_units):
      self.units.append(
          Perceptron(
              input_size=self.input_size,
              init_rule=self.init_rule,
              activation=self.activation
          )
      )
  
  def _health_check(self):
    '''Perform a health check prediction'''

    # 1D (input_size)
    X = np.random.rand(self.input_size)
    self(X)

    # 2D (4 items, input_size)
    X = np.random.rand(4, self.input_size)
    self(X)


  def set_weights(self, weights: np.ndarray):
    '''Update weights per unit

      Params:
        weights (np.ndarray): array of weights; size: (num_units, len(unit.weights))
    '''

    # Assure weights have same length as number of units
    assert len(weights) == self.num_units, "Array of weigths must have shape (num_units, len(unit.weights))"

    for unit_weights, unit in zip(weights, self.units):

      # Update weights for each unit
      unit.set_weights(unit_weights)

  def get_weights(self) -> np.ndarray:
    '''Return a array of a copy of current weights per unit'''

    units_weights = []

    for unit in self.units:

      # Append unit weights
      units_weights.append(unit.get_weights())

    # Return weights
    return np.array(units_weights)

  def backward(self, delta, value):
    '''
    Apply backward propagation of the layer
    '''

    # Get gradient
    grad = self.activation.gradient(value)

    # Return new delta
    new_delta = (delta @ self.get_weights())[:, 1:] * grad

    return new_delta

  def forward(self, X: np.ndarray) -> np.ndarray:
    '''Make a batch or single prediction
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        
      Returns:
        output (np.ndarray) logits
    '''
    
    # Initalizate logits list (size of num units)
    logits = []

    for unit in self.units:
      
      # Apply foward in perceptron unit
      logit = unit(X)

      # Save logit
      logits.append(logit)

    # Return transpose logits
    logits = np.array(logits).T

    return logits

  def __call__(self, X: np.ndarray) -> np.ndarray:
    '''Make a batch or single prediction (runs forward method)
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        
      Returns:
        output (np.ndarray) logits
    '''

    y = self.forward(X)

    self._save_foward_transform(X, y)

    return y

### MultiLayerPerceptron Class

In [None]:
class MultiLayerPerceptron():

  def __init__(self, layers: list, loss_func = None, metric = None):
    '''Initializes MultiLayerPerceptron Model
    
      Params:
        layers (list): list of perceptron layers
        loss_func (func): loss function to be applied
      '''

    # Perceptron Layers
    self.layers = layers
    self.num_layers = len(layers)

    # Input and output sizes
    self.input_size = layers[0].input_size
    self.output_size = layers[-1].output_size

    # Save loss
    if loss_func is None:
      self.loss_func = MSE()
    else:
      self.loss_func = loss_func

    # Save metric
    self.metric = metric

    # Health check
    self._health_check()

    # Last dWs (weight variations)
    self.last_dWs = {}
  
  def _health_check(self):
    '''Perform a health check prediction'''

    # Get first layer input size
    selfinput_size = self

    # 1D (input_size)
    X = np.random.rand(self.input_size)
    self(X)

    # 2D (4 items, input_size)
    X = np.random.rand(4, self.input_size)
    self(X)

  def _extend(self, vec):
    
    return np.hstack([np.ones((vec.shape[0], 1)), vec])

  def _backpropagate(self, x_input: np.ndarray, predicted_y: np.ndarray, 
                     reference_y: np.ndarray, learning_rate: float, momentum_rate: float):
    '''
    Backpropagate loss to update each perceptron set of weigths in
    each layer of the model

    Params:
      x_input (np.ndarray): batch of input x
      reference_y (np.ndarray): batch of reference y's
      predicted_y (np.ndarray): batch of predicted y's
      learning_rate (float): learning rate param
      momentum_rate (float): momentum rate param
    '''

    # Get first delta
    delta = predicted_y - reference_y

    # Get last predicted layer output
    last_output = predicted_y

    # Initialize weights dict variations
    dWs = {}

    # Iterate backwards over layers
    for i in range(-1, -len(self.layers), -1):
      
      # Get layer last output
      last_output = self.layers[i - 1].last_output

      # Update layer weights variation
      dWs[i] = delta.T @ self._extend(last_output)

      # Get new delta
      delta = self.layers[i].backward(delta, last_output)

    # Update layer weights variation (first layer)
    dWs[-self.num_layers] = delta.T @ self._extend(x_input)

    # Initialize current dWs variations
    current_dWs = {}

    # Update each layer weights
    for k, dW in dWs.items():
      
      # Get current weights
      weights = self.layers[k].get_weights()

      # Can't apply momentum yet
      if self.last_dWs == {}:
        variation = -(learning_rate * dW)
      
      # Apply momentum
      else:
        variation = -(learning_rate * dW) + momentum_rate * self.last_dWs[k]

      # Update weights
      weights += variation

      # Update weights
      self.layers[k].set_weights(weights)

      # Update current dWs
      current_dWs[k] = variation
    
    # Update last dWs
    self.last_dWs = current_dWs

  def get_weights(self) -> np.ndarray:
    '''Return a list of arrays of a copy of current weights per layer and unit'''

    layer_weights = []

    for layer in self.layers:

      # Append unit weights
      layer_weights.append(layer.get_weights())

    # Return weights
    return layer_weights

  def _get_batch(self, X: np.ndarray, y: np.ndarray, batch_size: int):
    '''Generator of batch of items from X and y input data'''

    # X and y lengths must match
    assert len(X) == len(y), f"X (len = {len(X)}) and y (len = {len(y)}) lengths must match"

    # Produce batches
    batches = []

    # For each batch step append items
    for step in range(len(X)//batch_size + 2):
      
      # Get batch
      X_batch = X[step*batch_size:(step+1)*batch_size]
      y_batch = y[step*batch_size:(step+1)*batch_size]

      if len(X_batch) == 0: break

      batches.append({
          'X': X_batch,
          'y': y_batch
      })

    # Generate each batch pre-computed
    for batch in batches:

      # Return item
      yield batch

  def fit(self, X: np.ndarray, y: np.ndarray, learning_rate: float = 0.1, momentum_rate: float = 1e-5,
          max_epochs: int = 5, stop_threshold: float = 1e-3, batch_size: int = 1):
    '''
    Fit the MLP model using a max_epochs steps, or when the stop_threshold
    is met

      Params:
        X (np.ndarray): a array of inputs, each input must match model input_size (number of features)
        y (np.ndarray): a array of target values (labels)
        learning_rate (float): hyperparameter to be used on backpropagation
        momentum_rate (float): hyperparameter to be used on backpropagation
        max_epochs (int): number of maximum epochs to iterate
        stop_threshold (float): number to be used to stop training if epoch loss is lower
        batch_size (int): size of each batch for the training steps

      Returns:
        history (dict): A dictionary containing training data over training as epoch loss
    '''

    # Assure X and y has same size
    assert len(X) == len(y), f"X (len = {len(X)}) and y (len = {len(y)}) lengths must match"
    assert len(y[0]) == self.layers[-1].num_units, f"y_i (len = {len(y[0])}) and output layer (len = {self.layers[-1].num_units}) lengths must match"

    # Epoch iterator
    iterator = tqdm(range(max_epochs), leave=False)

    # Num items
    num_items = len(y)

    # History of train
    history = {'loss': [], 'score': []}

    for epoch in iterator:

      # Log epoch
      iterator.set_description(f'Epoch {epoch+1}/{max_epochs}')

      # Initialize epoch loss (mean absolute value)
      epoch_loss = 0
      
      # Iterate over all items
      for batch in self._get_batch(X, y, batch_size):

        # Retriever X and y batch
        input_x = batch['X']
        reference_y = batch['y']
        
        # Make prediction
        predicted_y = self(input_x)

        # Calculate error (loss)
        loss = self.loss_func.calculate(reference_y, predicted_y)

        # Add to epoch loss
        epoch_loss += batch_size * loss/num_items

        # Update weights
        self._backpropagate(input_x, predicted_y, reference_y, learning_rate, momentum_rate)

      # Calculate metric
      if self.metric:
        score = self.evaluate(X, y)
      
        # Log epoch loss and metric
        iterator.set_postfix({'loss': epoch_loss, 'score': score})
        history['score'].append(score)
      else:
        # Log epoch loss
        iterator.set_postfix({'loss': epoch_loss})

      # Append history
      history['loss'].append(epoch_loss)

      # Stop Threshold
      if epoch_loss < stop_threshold:
        print(f'>> Loss met stop condition (at epoch {epoch+1}): loss = {epoch_loss} < {stop_threshold}')
        break

    return history

  def evaluate(self, X: np.ndarray, y: np.ndarray):
    '''Make a batch or single prediction
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        y (np.ndarray): Batch of expected predictions (2D array) or a item (1D array)
        
      Returns:
        score (dict)
    '''

    # Make prediction
    y_pred = model(X)

    return self.metric(y, y_pred)

  def forward(self, X: np.ndarray) -> np.ndarray:
    '''Make a batch or single prediction
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        
      Returns:
        output (np.ndarray) logits
    '''

    # Apply forward on each sequential layer
    for layer in self.layers:
      X = layer(X)
    
    return X

  def __call__(self, X: np.ndarray) -> np.ndarray:
    '''Make a batch or single prediction (runs forward method)
    
      Params:
        X (np.ndarray): Batch of items (2D array) or a item (1D array)
        
      Returns:
        output (np.ndarray) logits
    '''

    return self.forward(X)

### Wine Problem

#### Load Dataset

In [None]:
# Retrieve Wine data
dataset = load_wine(as_frame=True)

# Get features
X = dataset['data']

# Get target class
y = dataset['target']

print('>> X:')
print(X.describe())
print()
print('>> Y:')
print(y.describe())

>> X:
          alcohol  malic_acid  ...  od280/od315_of_diluted_wines      proline
count  178.000000  178.000000  ...                    178.000000   178.000000
mean    13.000618    2.336348  ...                      2.611685   746.893258
std      0.811827    1.117146  ...                      0.709990   314.907474
min     11.030000    0.740000  ...                      1.270000   278.000000
25%     12.362500    1.602500  ...                      1.937500   500.500000
50%     13.050000    1.865000  ...                      2.780000   673.500000
75%     13.677500    3.082500  ...                      3.170000   985.000000
max     14.830000    5.800000  ...                      4.000000  1680.000000

[8 rows x 13 columns]

>> Y:
count    178.000000
mean       0.938202
std        0.775035
min        0.000000
25%        0.000000
50%        1.000000
75%        2.000000
max        2.000000
Name: target, dtype: float64


#### Preprocess Dataset

In [None]:
def one_hot_encode(y):
  '''
  One hot encode y
  '''

  # Initialize a array of zeros
  one_hot_y = np.zeros(shape=(len(y), y.nunique()))

  # Iterate over values and set class as 1
  for i, y_value in enumerate(y.values):
    one_hot_y[i, y_value] = 1

  return one_hot_y

def one_hot_decode(y):
  '''
  One hot decode y
  '''

  return np.argmax(y, axis=1)

##### One hot / Data split

In [None]:
# One hot encode y
one_hot_y = one_hot_encode(y)

# Divide train/test
test_size = 0.2
X_train, X_test, Y_train, Y_test = train_test_split(X.values, one_hot_y, test_size = test_size, random_state=SEED)

##### Scale data

In [None]:
# Initialize scaler
scaler = StandardScaler()

# Scale data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Features
WINE_NUM_FEATURES = X_train_scaled.shape[1]

X_train_scaled.shape, X_test_scaled.shape

((142, 13), (36, 13))

#### Build Model

In [None]:
def build_model_wine(kind = 'single'):
  '''
  Return untrained model for Wine Problem (1-hidden layer or 2-hidden layers)
  '''

  # Set random seed
  np.random.seed(SEED)

  # Assert kind is 'single' or 'double'
  assert kind in ['single', 'double'], "kind must be 'single' or 'double'"

  if kind == 'single':
    
    # Single Layer Model
    layer1 = PerceptronLayer(input_size=WINE_NUM_FEATURES, units=16, init_rule='rand', activation=Swish())
    layer2 = PerceptronLayer(input_size=16, units=3, init_rule='rand', activation=Sigmoid())

    # Model
    model = MultiLayerPerceptron(layers = [layer1, layer2], loss_func=MSE(), metric=MultiClassAccuracy())

  elif kind == 'double':

    # Double Layer Model
    layer1 = PerceptronLayer(input_size=WINE_NUM_FEATURES, units=16, init_rule='rand', activation=Swish())
    layer2 = PerceptronLayer(input_size=16, units=16, init_rule='rand', activation=Swish())
    layer3 = PerceptronLayer(input_size=16, units=3, init_rule='rand', activation=Sigmoid())

    # Model
    model = MultiLayerPerceptron(layers = [layer1, layer2, layer3], loss_func=MSE(), metric=MultiClassAccuracy())
  
  # Return model
  return model

In [None]:
model = build_model_wine(kind='double')

#### Train

In [None]:
# Model accuracy before train:

print('>> Accuracy (before on test set)')
model.evaluate(X_test_scaled, Y_test)

>> Accuracy (before on test set)


0.19444444444444445

In [None]:
hist = model.fit(X_train_scaled, Y_train, max_epochs=250, batch_size=1, learning_rate=0.03, momentum_rate=0.3)

  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 13): loss = 0.0007580471728399052 < 0.001


In [None]:
# Model accuracy after train:

print('>> Accuracy (after train on test set)')
model.evaluate(X_test_scaled, Y_test)

>> Accuracy (after train on test set)


1.0

#### Training Visualization

In [None]:
fig = px.line(y=hist['loss'])
fig.update_layout(
    title = 'Wine | Loss vs Epochs',
    xaxis_title = 'Epoch',
    yaxis_title = 'Loss'
)

In [None]:
fig = px.line(y=hist['score'])
fig.update_layout(
    title = 'Wine | Accuracy vs Epochs',
    xaxis_title = 'Epoch',
    yaxis_title = 'Accuracy'
)

#### Evaluation Pipeline

In [None]:
# Hyperparams
momentum_rates = [0, 0.03, 0.3]
learning_rates = [0.001, 0.01, 0.03]
max_epochs_range = [100, 250]
models = ['single', 'double']

# Evaluation Dataframe
evaluation_df = pd.DataFrame(columns=["model", "learning_rate", "momentum_rate", "epochs_taken", "score (accuracy)"])

# Models to be trained
num_models = len(momentum_rates)*len(learning_rates)*len(max_epochs_range)*len(models)
print('>> Models to be trained:', num_models)

# Logger
pbar = tqdm(models)
count = 1

for model_kind in pbar:

  for learning_rate in learning_rates:

    for momentum_rate in momentum_rates:

      for max_epochs in max_epochs_range:

        # Initialize model
        model = build_model_wine(kind=model_kind)

        # Train
        history = model.fit(X_train_scaled, Y_train, max_epochs=max_epochs, batch_size=1, learning_rate=learning_rate, momentum_rate=momentum_rate)
        epochs_taken = len(history['loss'])

        # Evaluate
        score = model.evaluate(X_test_scaled, Y_test)

        # Update evaluation df
        evaluation_df.loc[len(evaluation_df)] = [model_kind, learning_rate, momentum_rate, epochs_taken, score]

        # Update log
        pbar.set_postfix({'models_trained': f'{count}/{num_models}'})
        count += 1

# Sort by score -> epochs_taken -> model
evaluation_df = evaluation_df.sort_values(by=['score (accuracy)', 'epochs_taken', 'model'], ascending=[False, True, False]).reset_index(drop=True)

>> Models to be trained: 60


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in reduce



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in reduce



  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 144): loss = 0.0009919758837462168 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 139): loss = 0.0009987840380699014 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 100): loss = 0.0009887820493131626 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 100): loss = 0.0009887820493131626 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


overflow encountered in reduce


invalid value encountered in reduce



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


overflow encountered in reduce


invalid value encountered in reduce



  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 47): loss = 0.0009922178021361742 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 47): loss = 0.0009922178021361742 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 46): loss = 0.0009749205267746628 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 46): loss = 0.0009749205267746628 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 33): loss = 0.0009674761798016317 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 33): loss = 0.0009674761798016317 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in reduce



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in reduce



  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in multiply



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in multiply



  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in reduce


invalid value encountered in matmul



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in reduce


invalid value encountered in matmul



  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 39): loss = 0.0009909082142814548 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 39): loss = 0.0009909082142814548 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 38): loss = 0.000989533307771962 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 38): loss = 0.000989533307771962 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 31): loss = 0.0009764703262407298 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 31): loss = 0.0009764703262407298 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


overflow encountered in matmul


invalid value encountered in reduce



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in multiply


overflow encountered in matmul


invalid value encountered in reduce



  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in multiply



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in multiply



  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 20): loss = 0.0008719681020952562 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 20): loss = 0.0008719681020952562 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 19): loss = 0.0009131292096746537 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 19): loss = 0.0009131292096746537 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 13): loss = 0.0007580471728399052 < 0.001


  0%|          | 0/250 [00:00<?, ?it/s]

>> Loss met stop condition (at epoch 13): loss = 0.0007580471728399052 < 0.001


  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow encountered in exp


overflow 

  0%|          | 0/100 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in multiply



  0%|          | 0/250 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in multiply


invalid value encountered in multiply


invalid value encountered in multiply



In [None]:
# Show evaluation df
evaluation_df

Unnamed: 0,model,learning_rate,momentum_rate,epochs_taken,score (accuracy)
0,double,0.03,0.3,13,1.0
1,double,0.03,0.3,13,1.0
2,double,0.03,0.03,19,1.0
3,double,0.03,0.03,19,1.0
4,double,0.03,0.0,20,1.0
5,double,0.03,0.0,20,1.0
6,double,0.01,0.3,31,1.0
7,double,0.01,0.3,31,1.0
8,single,0.03,0.3,33,1.0
9,single,0.03,0.3,33,1.0


### Geographical Original of Music Problem

#### Load Dataset

In [None]:
# Download data
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00315/Geographical%20Original%20of%20Music.zip -O geo.zip
!unzip geo.zip
!mv /content/Geographical\ \Original\ \of\ \Music/default_features_1059_tracks.txt /content

--2021-09-19 16:57:05--  https://archive.ics.uci.edu/ml/machine-learning-databases/00315/Geographical%20Original%20of%20Music.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 618424 (604K) [application/x-httpd-php]
Saving to: ‘geo.zip’


2021-09-19 16:57:06 (677 KB/s) - ‘geo.zip’ saved [618424/618424]

Archive:  geo.zip
   creating: Geographical Original of Music/
  inflating: Geographical Original of Music/.DS_Store  
   creating: __MACOSX/
   creating: __MACOSX/Geographical Original of Music/
  inflating: __MACOSX/Geographical Original of Music/._.DS_Store  
  inflating: Geographical Original of Music/default_features_1059_tracks.txt  
  inflating: Geographical Original of Music/default_plus_chromatic_features_1059_tracks.txt  


In [None]:
raw_data = pd.read_csv(f"default_features_1059_tracks.txt")

# Move columns to first row
dataset = raw_data.T.reset_index().T.reset_index(drop=True)

# Get features
X = dataset[dataset.columns[:-2]].astype(float)

# Get target class
y = dataset[dataset.columns[-2:]].astype(float)

print('>> X:')
print(X.describe())
print()
print('>> Y:')
print(y.describe())

>> X:
                0            1   ...           66           67
count  1059.000000  1059.000000  ...  1059.000000  1059.000000
mean     -0.019312    -0.021095  ...     0.023401     0.022533
std       0.987911     0.986098  ...     1.011060     1.007017
min      -1.529463    -1.476560  ...    -1.221595    -1.329595
25%      -0.607617    -0.621808  ...    -0.641316    -0.658826
50%      -0.184135    -0.211257  ...    -0.315654    -0.304111
75%       0.312155     0.310377  ...     0.415290     0.352652
max       9.439499    10.719304  ...     5.387364     5.977184

[8 rows x 68 columns]

>> Y:
                68           69
count  1059.000000  1059.000000
mean     26.651294    38.405656
std      18.459432    50.419790
min     -35.300000   -88.760000
25%      14.660000     3.210000
50%      33.660000    32.830000
75%      39.910000    74.600000
max      54.680000   149.120000


#### Preprocess Dataset

##### Train/Test

In [None]:
# Divide train/test
test_size = 0.2
X_train, X_test, Y_train, Y_test = train_test_split(X.values, y, test_size = test_size, random_state=SEED)
Y_train, Y_test = Y_train.values, Y_test.values

##### Scale Data

In [None]:
# Initialize scaler
X_scaler = StandardScaler()
Y_scaler = MinMaxScaler()

# Scale data (X)
X_train_scaled = X_scaler.fit_transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Scale data (Y)
Y_train_scaled = Y_scaler.fit_transform(Y_train)
Y_test_scaled = Y_scaler.transform(Y_test)

X_train_scaled.shape, X_test_scaled.shape, Y_train_scaled.shape, Y_test_scaled.shape

((847, 68), (212, 68), (847, 2), (212, 2))

##### Feature Selection

In [None]:
# Fit Regressor
reg = ExtraTreesRegressor(n_estimators=50, random_state=SEED)
reg = reg.fit(X_train_scaled, Y_train_scaled)

# Selector Model
max_features = 18
selector_model = SelectFromModel(reg, prefit=True, max_features=max_features)

# Filter Features
X_train_filtred = selector_model.transform(X_train_scaled)
X_test_filtred = selector_model.transform(X_test_scaled)

# Features
MUSIC_NUM_FEATURES = X_train_filtred.shape[1]

X_train_filtred.shape, X_test_filtred.shape

((847, 18), (212, 18))

In [None]:
selected_featues = selector_model.get_support().astype(float)

fig = px.bar(y=reg.feature_importances_, color=selected_featues)
fig.update_layout(title="Feature importances", xaxis_title="Feature")
fig.show()

#### Build Model

In [None]:
def build_model_music(kind = 'single'):
  '''
  Return untrained model for Music Problem (1-hidden layer or 2-hidden layers)
  '''

  # Set random seed
  np.random.seed(SEED)

  # Assert kind is 'single' or 'double'
  assert kind in ['single', 'double'], "kind must be 'single' or 'double'"

  if kind == 'single':
    
    # Single Layer Model
    layer1 = PerceptronLayer(input_size=MUSIC_NUM_FEATURES, units=16, init_rule='rand', activation=Swish())
    layer2 = PerceptronLayer(input_size=16, units=2, init_rule='rand', activation=Relu())

    # Model
    model = MultiLayerPerceptron(layers = [layer1, layer2], loss_func=MSE(), metric=MSE())

  elif kind == 'double':

    # Double Layer Model
    layer1 = PerceptronLayer(input_size=MUSIC_NUM_FEATURES, units=16, init_rule='rand', activation=Swish())
    layer2 = PerceptronLayer(input_size=16, units=16, init_rule='rand', activation=Swish())
    layer3 = PerceptronLayer(input_size=16, units=2, init_rule='rand', activation=Relu())

    # Model
    model = MultiLayerPerceptron(layers = [layer1, layer2, layer3], loss_func=MSE(), metric=MSE())
  
  # Return model
  return model

In [None]:
# Model
model = build_model_music(kind='single')

#### Train

In [None]:
# Model MSE before train:

print('>> MSE Scaled (before train on test set)')
print(model.evaluate(X_test_filtred, Y_test_scaled), end="\n\n")

# Not scaled
y_true = Y_test
y_pred = Y_scaler.inverse_transform(model(X_test_filtred))

# mse
mse = MSE()
print('>> MSE (before train on test set)')
print(mse(y_true, y_pred))

>> MSE Scaled (before train on test set)
2.838301242586433

>> MSE (before train on test set)
58847.03788816988


In [None]:
hist = model.fit(X_train_filtred, Y_train_scaled, max_epochs=500, batch_size=6, learning_rate=0.01, momentum_rate=0)

  0%|          | 0/500 [00:00<?, ?it/s]

In [None]:
# Model MSE after train:

print('>> MSE Scaled (after train on test set)')
print(model.evaluate(X_test_filtred, Y_test_scaled), end="\n\n")

# Not scaled
y_true = Y_test
y_pred = Y_scaler.inverse_transform(model(X_test_filtred))

# mse
mse = MSE()
print('>> MSE (after train on test set)')
print(mse(y_true, y_pred))

>> MSE Scaled (after train on test set)
0.0339812716195145

>> MSE (after train on test set)
1092.5677970630195


#### Visualizing Train

In [None]:
fig = px.line(y=hist['score'])
fig.update_layout(
    title = 'Geographical Music | MSE vs Epochs',
    xaxis_title = 'Epoch',
    yaxis_title = 'MSE - Scaled'
)

#### Evaluation Pipeline

In [None]:
# Hyperparams
momentum_rates = [0, 0.03, 0.3]
learning_rates = [0.0001, 0.001, 0.01]
max_epochs_range = [200, 500]
models = ['single', 'double']

# Evaluation Dataframe
evaluation_df = pd.DataFrame(columns=["model", "learning_rate", "momentum_rate", "epochs_taken", "score (mse scaled)"])

# Models to be trained
num_models = len(momentum_rates)*len(learning_rates)*len(max_epochs_range)*len(models)
print('>> Models to be trained:', num_models)

# Logger
pbar = tqdm(models)
count = 1

for model_kind in pbar:

  for learning_rate in learning_rates:

    for momentum_rate in momentum_rates:

      for max_epochs in max_epochs_range:

        # Initialize model
        model = build_model_music(kind=model_kind)

        # Train
        history = model.fit(X_train_filtred, Y_train_scaled, max_epochs=max_epochs, batch_size=6, learning_rate=learning_rate, momentum_rate=momentum_rate, stop_threshold=0.01)
        epochs_taken = len(history['loss'])

        # Evaluate
        score = model.evaluate(X_test_filtred, Y_test_scaled)

        # Update evaluation df
        evaluation_df.loc[len(evaluation_df)] = [model_kind, learning_rate, momentum_rate, epochs_taken, score]

        # Update log
        pbar.set_postfix({'models_trained': f'{count}/{num_models}'})
        count += 1

# Sort by score -> epochs_taken -> model
evaluation_df = evaluation_df.sort_values(by=['score (mse scaled)', 'epochs_taken', 'model'], ascending=[True, True, False]).reset_index(drop=True)

>> Models to be trained: 60


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]


overflow encountered in exp


overflow encountered in square


overflow encountered in multiply


invalid value encountered in matmul


invalid value encountered in multiply


invalid value encountered in matmul


invalid value encountered in reduce



  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]


overflow encountered in matmul


overflow encountered in matmul



  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]


invalid value encountered in multiply


invalid value encountered in multiply


invalid value encountered in matmul



  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

In [None]:
# Show evaluation df
evaluation_df

Unnamed: 0,model,learning_rate,momentum_rate,epochs_taken,score (mse scaled)
0,single,0.01,0.0,500,0.033981
1,single,0.01,0.3,200,0.034015
2,single,0.01,0.03,500,0.034449
3,single,0.001,0.3,500,0.035183
4,single,0.01,0.3,500,0.035379
5,single,0.001,0.03,500,0.035442
6,single,0.001,0.0,500,0.035479
7,single,0.01,0.03,200,0.035488
8,double,0.01,0.0,200,0.035689
9,single,0.01,0.0,200,0.03572
