<a href="https://colab.research.google.com/github/scaverod/EasyNN/blob/master/EasyNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Easy Neural Network

Este cuaderno permite la construcción de redes de neuronas de forma sencilla. Basta con determinar y configurar los diferentes elementos que componen la red. 


## 1. Preparación del entorno
Para el diseño de una red de neuronas artificiales haremos uso de Tensorflow y Keras. 

### 1.1 Carga de librerías

**TensorFlow** es una biblioteca implementada por Google para sus aplicaciones de aprendizaje automático y las redes neuronales profundas.  TensorFlow es una librería para ejecutar operaciones matemáticas, es capaz ejecutar de forma rápida y eficiente operaciones matemáticas representadas cuya entrada y salida son un vector multidimensional (o tensor) de datos.

Por otro lado, **Keras** es una librería de redes neuronales desarrollada por François Chollet (entre otros), un ingeniero de Google. Keras es una abstracción para la creación de modelos de aprendizaje que opera con Tensorflow. 

En este trabajo haremos uso de la versión 2 te Tensorflow la cual acaba acaba de ser publicada. Por defecto en las máquinas de Google Colabn no está instalada por lo tanto será lo primero que haremos. 

In [0]:
!pip uninstall tensorflow -y
!pip install tensorflow-gpu==2.0.0

Ahora importaremos todas las librerías necesarias para el desarrollo de esta práctica:

In [0]:
import tensorflow as tf
from tensorflow import keras
print("Tensorflow version: "+tf.__version__)
print("Keras version: "+keras.__version__)

# Helper libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm
from time import gmtime, strftime
from enum import Enum
import time 

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

%matplotlib inline

# Google file system
from google.colab import drive
drive.mount('/gdrive', force_remount=True)
print("Librerías cargadas correctamente")

In [0]:
ATT_FILE = "/gdrive/My Drive/"
LABEL_FILE = "/gdrive/My Drive/"
MODEL_PATH = "/gdrive/My Drive/"

### 1.2 Definición de funciones útiles
En esta sección vamos a implementar variables, funciones y clases que nos serán útiles a lo largo de este trabajo:
* `SEED`: semilla que permite controlar la aleatoriedad de manera que los experimentos sean repetibles.  
* `save_model`: permite guardar un modelo de una red de neuronas
* `load_model`: permite cargar un modelo de una red de neuronas
* `ActivationFunction`:
* TODO:


In [0]:
SEED = 42

In [0]:
def save_model(model, name=strftime("%Y-%m-%d", gmtime())):
    """Save a Keras model with a specific name"""
    model.save(MODEL_PATH+name)
    print("Model saved")

In [0]:
def load_model(model):
  """Return a specific keras model."""
  return keras.models.load_model(MODEL_PATH+model)

In [0]:
class ActivationFunction(Enum):
  """ https://keras.io/activations/ """
  ELU = "elu"
  SOFTMAX = "softmax"
  SELU = "selu"
  SOFTPLUS = "softplus"
  SOFTSIGN = "softsign"
  RELU = "relu"
  TANH = "tanh"
  SIGMOID = "sigmoid"
  H_SIGMOID = "hard_sigmoid"
  EXPONENTIAL = "exponential"
  LINEAR = "linear"

In [0]:
class Option(Enum):
  YES = True
  NO = False

In [0]:
class Initializers(Enum):
  ZEROS = keras.initializers.Zeros()
  ONES = keras.initializers.Ones()
  CONSTANT = keras.initializers.Constant(value=0)
  RANDOMNORMAL = keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=SEED)
  RANDOMUNIFORM = keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=SEED)
  TRUNCATEDNORMAL = keras.initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=SEED)
  ORTHOGONAL = keras.initializers.Orthogonal(gain=1.0, seed=SEED)
  IDENTITY = keras.initializers.Identity(gain=1.0)
  LECUN_UNIFORM = keras.initializers.lecun_uniform(seed=SEED)
  GLOROT_NORMAL = keras.initializers.glorot_normal(seed=SEED)
  GLOROT_UNIFORM = keras.initializers.glorot_uniform(seed=SEED)
  HE_NORMAL = keras.initializers.he_normal(seed=SEED)
  HE_UNIFORM = keras.initializers.he_uniform(seed=SEED)
  LECUN_NORMAL = keras.initializers.lecun_normal(seed=SEED)

In [0]:
class Optimizers(Enum):
  SGD = 0
  RMSPROP = 1
  ADAGRAD = 2
  ADADELTA = 3
  ADAM = 4
  ADAMAX = 5 
  NADAM = 6

In [0]:
def ask(parameter, default_value):
  try:
    return float(input("Please enter the "+  parameter + " : "))
  except ValueError:
       print("Oops!  That was no valid input, "+  parameter + " will be " + str(default_value))
       return default_value

In [0]:
def ask_bool(parameter, default_value):
  s =input("Please enter the "+  parameter + " : ")
  if s.lower() in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']:
    return True
  else:
    print("Oops!  That was no valid input, "+  parameter + " will be " + str(default_value))
    return default_value

In [0]:
def select_optimizer(optimizer):
  if optimizer is Optimizers.SGD:
    return keras.optimizers.SGD(learning_rate=ask("learning rate", float(0.01)), momentum=ask("momentum", float(0)), nesterov=ask_bool("nesterov", False))
  elif optimizer is Optimizers.RMSPROP:
    return keras.optimizers.RMSprop(learning_rate=ask("learning rate", float(0.001)), rho=ask("rho", float(0.9)))
  elif optimizer is Optimizers.ADAGRAD:
    return keras.optimizers.Adagrad(learning_rate=ask("learning rate", float(0.01)))
  elif optimizer is Optimizers.ADADELTA:
    return keras.optimizers.Adadelta(learning_rate=ask("learning rate", float(1.0)), rho=ask("rho", float(0.95)))
  elif optimizer is Optimizers.ADAM:
    return keras.optimizers.Adam(learning_rate=ask("learning rate", float(0.001)), beta_1=ask("beta 1", float(0.9)), beta_2=ask("beta 2", float(0.999)), amsgrad=ask_bool("amsgrad", False))
  elif optimizer is Optimizers.ADAMAX:
    return keras.optimizers.Adamax(learning_rate=ask("learning rate", float(0.002)), beta_1=ask("beta 1", float(0.9)), beta_2=ask("beta 2", float(0.999)))
  elif optimizer is Optimizers.NADAM:
    return keras.optimizers.Nadam(learning_rate=ask("learning rate", float(0.002)), beta_1=ask("beta 1", float(0.9)), beta_2=ask("beta 2", float(0.999)))

In [0]:
def get_act_fun(a_f, n_layers):
  act_fun = []
  if not a_f:
    act_fun = [ActivationFunction.RELU] * n_layers
  elif type(a_f)!=list:
    act_fun = [a_f] * n_layers
  elif len(a_f) > n_layers:
    act_fun = a_f[:n_layers]
  elif len(a_f) < n_layers:
    act_fun = a_f + [a_f[-1]] * (n_layers-len(a_f))
  else:
    return a_f
  return act_fun

In [0]:
def get_dropout(drp, n_layers):
  act_fun = []
  if not drp:
    act_fun = [0.5] * n_layers
  elif type(drp)!=list:
    act_fun = [drp] * n_layers
  elif len(drp) > n_layers:
    act_fun = drp[:n_layers]
  elif len(drp) < n_layers:
    act_fun = drp + [drp[-1]] * (n_layers-len(drp))
  else:
    return drp
  return act_fun

In [0]:
def get_regularizer(regularizer_L1, regularizer_L2):
  if regularizer_L1 and regularizer_L2:
    return keras.regularizers.l1_l2(regularizer_L1, regularizer_L2)
  elif not regularizer_L1 and not regularizer_L2:
    return None
  elif not regularizer_L1:
    return keras.regularizers.l1(regularizer_L2)
  else:
    return keras.regularizers.l1(regularizer_L1)

In [0]:
def plot_cm(y_true, y_pred, figsize=(10,10)):
    cm = confusion_matrix(y_true, y_pred, labels=np.unique(y_true))
    cm_sum = np.sum(cm, axis=1, keepdims=True)
    cm_perc = cm / cm_sum.astype(float) * 100
    annot = np.empty_like(cm).astype(str)
    nrows, ncols = cm.shape
    for i in range(nrows):
        for j in range(ncols):
            c = cm[i, j]
            p = cm_perc[i, j]
            if i == j:
                s = cm_sum[i]
                annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
            elif c == 0:
                annot[i, j] = ''
            else:
                annot[i, j] = '%.1f%%\n%d' % (p, c)
    cm = pd.DataFrame(cm, index=np.unique(y_true), columns=np.unique(y_true))
    cm.index.name = 'Actual'
    cm.columns.name = 'Predicted'
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(cm, cmap= "YlGnBu", annot=annot, fmt='', ax=ax)

### 1.3 División del conjunto de datos

In [0]:
TRAIN_RATE=0.8

attributes = pd.read_csv(ATT_FILE)
label = pd.read_csv(LABEL_FILE)

n_instances = attributes.shape[0]
n_train = int(n_instances*TRAIN_RATE)
n_dev = int((n_instances-n_train)/2)
n_final_test = n_instances-n_train-n_dev


x_train = attributes.values[:n_train]
t_train = label.values[:n_train]

x_dev = attributes.values[n_train:n_train+n_dev]
t_dev = label.values[n_train:n_train+n_dev]

x_final_test = attributes.values[n_train+n_dev:n_instances]
t_final_test = label.values[n_train+n_dev:n_instances]

INPUTS = x_train.shape[1]
OUTPUTS = t_train.shape[1]

print ("x_train:",x_train.shape)
print ("t_train:",t_train.shape)

print ("x_dev:",x_dev.shape)
print ("t_dev:",t_dev.shape)

print ("x_final_test:",x_final_test.shape)
print ("t_final_test:",t_final_test.shape)

print ("INPUTS: ",INPUTS)
print ("OUTPUTS: ",OUTPUTS)

In [0]:
x_train.shape[0] + x_dev.shape[0] + x_final_test.shape[0]

## 2. Definir valores de Hiperparámetros

### 2.1 Número de epochs


In [0]:
n_epochs =1000

### 2.2 Tamaño del Batch

In [0]:
batch_size = 500

### 2.3 Arquitectura de la red


In [0]:
n_neurons_per_hlayer = [128,64,32]

### 2.4 Optimizador

In [0]:
optimizer = select_optimizer(Optimizers.ADAM)

### 2.5 Función de Activación


In [0]:
activation = ActivationFunction.RELU
# or one for each layer [ActivationFunction.RELU,ActivationFunction.RELU,ActivationFunction.RELU,ActivationFunction.RELU,ActivationFunction.RELU]

### 2.6 Inicialización de los pesos

In [0]:
kernel_initializer = Initializers.HE_NORMAL

### 2.7 Normalización del Batch

In [0]:
normalization = Option.NO

### 2.8 Regularización

#### Dropout

In [0]:
dropout = Option.NO
prob_per_hlayer = [0.05,0.02,0.01]
# https://stackoverflow.com/questions/34716454/where-do-i-call-the-batchnormalization-function-in-keras

#### Regularización L1 y L2

In [0]:
regularizer_L1 = None
regularizer_L2 = None
# More info: https://medium.com/datadriveninvestor/l1-l2-regularization-7f1b4fe948f2

### 2.9 Mostrar trazas

In [0]:
verbose = Option.YES

## 3. Construcción del modelo

In [0]:
def create_model(model_name ="FeedforwardNN",
                 n_neurons_per_hlayer=[1000, 500, 250, 75, 25],
                 optimizer = None,
                 verbose = Option.NO,
                 act_fun = None,
                 kernel_initializer=Initializers.GLOROT_UNIFORM, 
                 normalization = Option.NO,
                 dropout = Option.NO, 
                 drop_prop = None, 
                 regularizer_L1 = None, 
                 regularizer_L2 = None):
  
  a_f = get_act_fun(act_fun, len(n_neurons_per_hlayer))
  if not optimizer:   optimizer = select_optimizer(Optimizers.SGD)
  if dropout is Option.YES: drop_prop = get_dropout(drop_prop,len(n_neurons_per_hlayer))
  kernel_regularizer = get_regularizer(regularizer_L1, regularizer_L2)
  if verbose.value: print("Creating model...")
  model = keras.Sequential(name=model_name)
  if verbose.value: print("Model name set as: " + model_name)
  model.add(keras.layers.InputLayer(input_shape=(INPUTS,), batch_size=None))
  if verbose.value: print("Input layer created. Number of inputs is " + str(INPUTS))
  for i, (neurons, act) in enumerate(zip(n_neurons_per_hlayer, a_f)):
    if verbose.value: print("For layer "+ str(i+1)+"... \n   \t- Weight initializer is " + kernel_initializer.name + "\n   \t- Number of neurons is " + str(neurons))
    if kernel_regularizer:
      model.add(keras.layers.Dense(neurons, kernel_initializer=kernel_initializer.value, kernel_regularizer=kernel_regularizer))
      if verbose.value: print("   \t- Kernel regularization is used. L1 is " + str(regularizer_L1) + " and L2 is " + str(regularizer_L2))
    else: 
      model.add(keras.layers.Dense(neurons, kernel_initializer=kernel_initializer.value))
    if normalization is Option.YES : 
      model.add(tf.keras.layers.BatchNormalization())
      if verbose.value: print("   \t- Batch Normalization is used")
    model.add(tf.keras.layers.Activation(act.value))
    if verbose.value: print("   \t- Activation Function is " + act.value)
    if dropout is Option.YES : 
      model.add(tf.keras.layers.Dropout(drop_prop[i]))
      if verbose.value: print("   \t- Dropout is used with p=" + str(drop_prop[i]))

  model.add(keras.layers.Dense(OUTPUTS, activation="softmax", name="Softmax"))
  if verbose.value: print("Output layer created. Number of outputs is " + str(OUTPUTS))
  model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=optimizer,
              metrics=["categorical_accuracy"])
  if verbose.value:
    print("\n ########## SUMMARY ##########")
    model.summary()
  return model

## 4. Entrenamiento y validación

In [0]:
model = create_model(n_neurons_per_hlayer=n_neurons_per_hlayer,
                  optimizer = optimizer,
                  verbose = verbose,
                  act_fun = activation,
                  kernel_initializer=kernel_initializer, 
                  normalization = normalization,
                  dropout = dropout, 
                  drop_prop = prob_per_hlayer, 
                  regularizer_L1 = regularizer_L1, 
                  regularizer_L2 = regularizer_L2)

In [0]:
start = time.clock()
history = model.fit(x_train, t_train, batch_size=batch_size, epochs=n_epochs, 
                     verbose=verbose.value, validation_data=(x_dev, t_dev))
print ("Numero de épocas " + str(n_epochs))
print ("Tamaño del lote " + str(batch_size))                       
print ("Tiempo necesitado " +  "{:.2f}".format(time.clock() - start) + "s.")

In [0]:
results=pd.DataFrame(history.history)
results.plot(figsize=(8, 5))
plt.grid(True)
plt.xlabel ("Epochs")
plt.ylabel ("Accuracy - Mean Log Loss")
plt.gca().set_ylim(0, 1) # set the vertical range to [0,1]
plt.show()

In [0]:
print ("Error for the training set: ", (1 - results.categorical_accuracy.values[-1:][0]))

In [0]:
print ("Error for the development test set: ", (1 - results.val_categorical_accuracy.values[-1:][0]))

## 5. Test final

In [0]:
y_pred = model.predict_classes(x_final_test, verbose=0)
y_true=np.argmax(t_final_test, axis=1)

In [0]:
print ("Error for the final test set: ", (1 - accuracy_score(y_true, y_pred)))

In [0]:
plot_cm(y_true, y_pred)