# SIMULATIONS

This notebook runs all the simulation created for analyze the results included in the document. 

## MNIST SIMULATIONS

Load libraries

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from operator import itemgetter
import pandas as pd
import random
from sklearn.model_selection import ShuffleSplit
!pip install fitter
import seaborn as sns
from fitter import Fitter, get_common_distributions, get_distributions
# import tensorflow_probability as tfp
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.utils import to_categorical
from os import listdir
from os.path import isfile, join
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from utils_mnist import FluidNetwork
from utils_mnist import GA
from utils_mnist import make_confusion_matrix

In [None]:
def load_data_mnist():
  '''
  Loads MNIST data
  # Returns: MNIST data normalized
  '''
  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
  x_train = x_train/255.
  x_test = x_test/255.
  return (x_train, y_train), (x_test, y_test)

In [None]:
def data_augmentation(x_train):
  '''
  Build ImageDataGenerator 
  # Args: x_train
  # Returns: ImageDataGenerator instance fitted
  '''
  datagen = ImageDataGenerator(
      featurewise_center=False,
      featurewise_std_normalization=False,
      rotation_range=20,
      width_shift_range=0.2,
      height_shift_range=0.2,
      horizontal_flip=True,
      validation_split=0.2)
  datagen.fit(x_train)
  return datagen

### Data preparation

Load MNIST data

In [None]:
(x_train, y_train), (x_test, y_test) = load_data_mnist()

In [None]:
X = np.vstack((x_train, x_test))
y = np.hstack((y_train, y_test))

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X[1:2000], y[1:2000], test_size=0.3, stratify=y[1:2000])

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
x_train_aug = np.reshape(x_train, (x_train.shape[0],x_train.shape[1], x_train.shape[2],1))

Fit ImageDataGenerator

In [None]:
datagen = data_augmentation(x_train_aug)

### Keras model implementation

In [None]:
# Parameters selection
input_dim = 28
output_dim = 10
num_max_units = 128

In [None]:
def build_model(max_layers):
  '''
  Builds a sequential model defined by max_layers
  # Args: max_layers of the model
  # Returns: model 
  '''
  inputs = keras.Input(shape=(input_dim, input_dim,1), name="digits")

  for layers in range(max_layers):
    if layers == 0:  
      x = keras.layers.Flatten()(inputs)
    else:
      x = keras.layers.Dense(num_max_units, activation="relu")(x)

  outputs = keras.layers.Dense(output_dim, activation="softmax", name="classification")(x)
  return  keras.Model(inputs=inputs, outputs=outputs)


In [None]:
model = build_model(4)
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',  
              metrics=[tf.keras.metrics.CategoricalAccuracy()]) 

In [None]:
x_train_aug = np.reshape(x_train, (x_train.shape[0],x_train.shape[1], x_train.shape[2],1))
x_test_aug = np.reshape(x_test, (x_test.shape[0],x_test.shape[1], x_test.shape[2],1))

In [None]:
model.summary()

Training 

In [None]:
# fits the model on batches with real-time data augmentation:
history = model.fit(datagen.flow(x_train_aug, y_train, batch_size=128,
         subset='training'),
         validation_data =(x_test_aug, y_test), epochs=1000)

Results visualization

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = model.predict(x_test_aug)
y_pred_ = pd.DataFrame(y_pred).idxmax(axis=1).values
y_test_aux = pd.DataFrame(y_test).idxmax(axis=1).values
cf_matrix = confusion_matrix(y_test_aux, y_pred_)
print(pd.DataFrame(cf_matrix))
print('Accuracy:',accuracy_score(y_test_aux, y_pred_))

### FluidNet implementation

In [None]:
# Topological parameters
window_size = 40
max_layers = 3+3 # con 5 capas al principio no funciona
num_max_units = 128
# input_dim = window_size
input_dim = 28*28
# output_dim = 2
output_dim = 10

layers = np.zeros(max_layers, dtype='uint32')
for i in range(max_layers): 
  if i == 0:
    layers[i] = input_dim
  elif i == max_layers-1:
    layers[i] = output_dim
  else:
    layers[i] = num_max_units

In [None]:
from utils_mnist import load_data_mnist
(x_train, y_train), (x_test, y_test) = load_data_mnist()

In [None]:
net = FluidNetwork(layers)

In [None]:
# Training parameters
batch_size = 256
epochs = 6000
steps_per_epoch = int(x_train.shape[0]/batch_size)
lr = 1e-3
trigger = 0.01 # set <0.1 to enable AG flag
print('Steps per epoch', steps_per_epoch)

In [None]:
history = net.train(
    x_train,y_train,
    x_test, y_test,
    epochs, datagen,
    batch_size, lr, trigger)

## PATTERN RECOGNITION SIMULATIONS

Load libraries

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from operator import itemgetter
import pandas as pd
import random
from sklearn.model_selection import ShuffleSplit
!pip install fitter
import seaborn as sns
from fitter import Fitter, get_common_distributions, get_distributions
# import tensorflow_probability as tfp
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.utils import to_categorical
from os import listdir
from os.path import isfile, join
from sklearn.model_selection import train_test_split
from tensorflow import keras
from utils_patterns import FluidNetwork
from utils_patterns import GA
from utils_mnist import make_confusion_matrix

In [None]:
# To avoid data preparation steps run the following cell and ignore the data preparation ones

In [None]:
import pickle
# Load data
f = open('Data_stock_net_model/y_train_stock.pckl', 'rb')
y_train = pickle.load(f)
f.close()

f = open('Data_stock_net_model/y_test_stock.pckl', 'rb')
y_test = pickle.load(f)
f.close()

f = open('Data_stock_net_model/x_train_stock.pckl', 'rb')
x_train = pickle.load(f)
f.close()

f = open('Data_stock_net_model/x_test_stock.pckl', 'rb')
x_test = pickle.load(f)
f.close()

#### Data preparation

In [None]:
def load_data_stock():
  '''
  Loads stock data gathered from Yahoo Finance
  '''
  # Load data
  date = pd.read_csv('Data_stock_yahoo/date.csv',header=None)
  aux = date.iloc[:,0].values
  validSymbols = pd.read_csv('Data_stock_yahoo/selectedSymbols.csv', header=None)
  validCols = validSymbols.iloc[0,:].values - 1
  close_quotes = pd.read_csv('Data_stock_yahoo/close.csv', header=None, usecols=validCols)
  open_quotes = pd.read_csv('Data_stock_yahoo/open.csv',header=None, usecols=validCols)
  low_quotes = pd.read_csv('Data_stock_yahoo/low.csv',header=None, usecols=validCols)
  high_quotes = pd.read_csv('Data_stock_yahoo/high.csv',header=None, usecols=validCols)
  volume_quotes = pd.read_csv('Data_stock_yahoo/volume.csv',header=None, usecols=validCols)
  # Rename df
  # Col names --> stock ticker names
  ticker = pd.read_csv('Data_stock_yahoo/ticker.csv', header=None)
  valid_stock_tickers = ticker.loc[validCols, 0].values
  close_quotes.columns = valid_stock_tickers
  open_quotes.columns = valid_stock_tickers
  high_quotes.columns = valid_stock_tickers
  low_quotes.columns = valid_stock_tickers
  volume_quotes.columns = valid_stock_tickers
  # Row index --> date index
  close_quotes.index = aux
  open_quotes.index = aux
  high_quotes.index = aux
  low_quotes.index = aux
  volume_quotes.index = aux
  return close_quotes, open_quotes, high_quotes, low_quotes, volume_quotes

In [None]:
close_quotes, open_quotes, high_quotes, low_quotes, volume_quotes = load_data_stock()

In [None]:
def normalize_data(close_quotes, open_quotes, high_quotes, low_quotes, volume_quotes):
  '''
  Normalize stock data using logaritmic returns
  # Args: stock data
  # Returns: stock data normalized
  '''
  close_quotes_pu = np.log(close_quotes).diff().fillna(0)
  open_quotes_pu = np.log(open_quotes).diff().fillna(0)
  high_quotes_pu = np.log(high_quotes).diff().fillna(0)
  low_quotes_pu = np.log(low_quotes).diff().fillna(0)
  volume_quotes_pu = np.log(volume_quotes).diff().fillna(0)
  return close_quotes_pu, open_quotes_pu, high_quotes_pu, low_quotes_pu, volume_quotes_pu

In [None]:
close_quotes_pu, open_quotes_pu, high_quotes_pu, low_quotes_pu, volume_quotes_pu = normalize_data(close_quotes, open_quotes, high_quotes, low_quotes, volume_quotes)

In [None]:
def download_files(): 
  ''' 
  Donwload labaled data from file
  '''
  onlyfiles = [f for f in listdir('Data_labeled') if isfile(join('Data_labeled', f))]

  data = []

  for file_ in onlyfiles:
      df = pd.read_csv(join('Data_labeled', file_), usecols=[1,2,3,4])    
      data.append(df)

  data = pd.concat(data)
  return data

In [None]:
def get_master_table(close_quotes_pu):
  window_size = 40
  master_table_aux = download_files()
  master_table = pd.DataFrame(np.zeros((master_table_aux.shape[0], window_size+1)))
  for i in range(master_table_aux.shape[0]):
    init_date = master_table_aux.iloc[i, 1]
    stock = master_table_aux.iloc[i, 2]
    master_table.iloc[i, 0:window_size] = close_quotes_pu.iloc[init_date:init_date+window_size, stock].values
  master_table.iloc[:, -1] = master_table_aux.iloc[:, -1].values
  return master_table

In [None]:
master_table = get_master_table(close_quotes_pu)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(master_table.iloc[:,:-1].values, master_table.iloc[:,-1].values, test_size=0.3, stratify=master_table.iloc[:,-1].values)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

### Keras model implementation

In [None]:
input_dim = 40
num_max_units = 128
max_layers = 3

In [None]:
def build_model(max_layers):
  inputs = tf.keras.Input(shape=(input_dim, ), name="patterns")

  for layers in range(max_layers):
    if layers == 0:  
      x = keras.layers.Dense(num_max_units, activation="relu")(inputs)
    else:
      x = keras.layers.Dense(num_max_units, activation="relu")(x)

  outputs = keras.layers.Dense(1, activation="sigmoid", name="classification")(x)
  return  keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model = build_model(max_layers)
model.summary()
model.compile(optimizer='adam',
              loss='binary_crossentropy',  
              metrics=tf.keras.metrics.Precision(0.5)) 

In [None]:
history = model.fit(x_train, np.argmax(y_train, axis=1)[None].T, epochs=2, validation_data=(x_test, np.argmax(y_test, axis=1)[None].T), batch_size=128)


In [None]:
y_pred = model.predict(x_test)
y_pred[y_pred <= 0.5] = 0.
y_pred[y_pred > 0.5] = 1.
y_pred

In [None]:
from sklearn.metrics import confusion_matrix
cf_matrix = confusion_matrix(np.argmax(y_test, axis=1), y_pred)
labels = ['True Neg','False Pos','False Neg','True Pos']
categories = ['0', '1']
make_confusion_matrix(cf_matrix, 
                      group_names=labels,
                      categories=categories)

### FluidNet implementation

In [None]:
# Topological parameters
window_size = 40
max_layers = 3+3 
num_max_units = 128
input_dim = window_size
output_dim = 2

layers = np.zeros(max_layers, dtype='uint32')
for i in range(max_layers): 
  if i == 0:
    layers[i] = input_dim
  elif i == max_layers-1:
    layers[i] = output_dim
  else:
    layers[i] = num_max_units

In [None]:
net = FluidNetwork(layers)

In [None]:
# Training parameters
batch_size = 128
epochs = 500
steps_per_epoch = int(x_train.shape[0]/batch_size)
lr = 1e-3
trigger = 0.01 # set <0.1 to enable AG flag
print('Steps per epoch', steps_per_epoch)

In [None]:
history = net.train(
    x_train,y_train,
    x_test, y_test,
    epochs, steps_per_epoch,
    batch_size, lr, trigger)

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = net.predict(x_test)
y_test_aux = pd.DataFrame(y_test).idxmax(axis=1).values
cf_matrix = confusion_matrix(y_test_aux, y_pred)
labels = ['True Neg','False Pos','False Neg','True Pos']
categories = ['0', '1']
make_confusion_matrix(cf_matrix, 
                      group_names=labels,
                      categories=categories)

### SMOTE for unbalanced data

In [None]:
!pip install imbalanced-learn

In [None]:
!pip install imblearn

In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from collections import Counter

In [None]:
# Define pipeline strategy
over = SMOTE(sampling_strategy=0.2)
under = RandomUnderSampler(sampling_strategy=0.3)
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)

In [None]:
# transform the dataset
x_train_os, y_train_os = pipeline.fit_resample(x_train, y_train)

In [None]:
print(Counter(y_train_os))

In [None]:
from sklearn.utils import shuffle
x_train_os, y_train_os= shuffle(x_train_os, y_train_os,random_state=0)

Keras model implementation

In [None]:
input_dim = 40
num_max_units = 128
max_layers = 3

In [None]:
def build_model(max_layers):
  inputs = tf.keras.Input(shape=(input_dim, ), name="patterns")

  for layers in range(max_layers):
    if layers == 0:  
      x = keras.layers.Dense(num_max_units, activation="relu")(inputs)
    else:
      x = keras.layers.Dense(num_max_units, activation="relu")(x)

  outputs = keras.layers.Dense(1, activation="sigmoid", name="classification")(x)
  return  keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model = build_model(max_layers)
model.summary()
model.compile(optimizer='adam',
              loss='binary_crossentropy',  
              metrics=tf.keras.metrics.Precision(0.5)) 

In [None]:
history = model.fit(x_train_os, y_train_os, epochs=100, validation_data=(x_test, np.argmax(y_test, axis=1)[None].T), batch_size=128)


In [None]:
y_pred = model.predict(x_test)
y_pred[y_pred <= 0.5] = 0.
y_pred[y_pred > 0.5] = 1.
y_pred

In [None]:
from sklearn.metrics import confusion_matrix
cf_matrix = confusion_matrix(np.argmax(y_test, axis=1), y_pred)
labels = ['True Neg','False Pos','False Neg','True Pos']
categories = ['0', '1']
make_confusion_matrix(cf_matrix, 
                      group_names=labels,
                      categories=categories)

FluidNet implementation

In [None]:
# Topological parameters
window_size = 40
max_layers = 3+3 
num_max_units = 128
input_dim = window_size
output_dim = 2

layers = np.zeros(max_layers, dtype='uint32')
for i in range(max_layers): 
  if i == 0:
    layers[i] = input_dim
  elif i == max_layers-1:
    layers[i] = output_dim
  else:
    layers[i] = num_max_units

In [None]:
net = FluidNetwork(layers)

In [None]:
# Training parameters
batch_size = 128
epochs = 6000
steps_per_epoch = int(x_train.shape[0]/batch_size)
lr = 1e-3
trigger = 0.01
print('Steps per epoch', steps_per_epoch)

In [None]:
y_train_os_one_hot = to_categorical(y_train_os)

In [None]:
history = net.train(
    x_train_os,y_train_os_one_hot,
    x_test, y_test,
    1000, steps_per_epoch,
    batch_size, lr, trigger)

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = net.predict(x_test)
y_test_aux = pd.DataFrame(y_test).idxmax(axis=1).values
cf_matrix = confusion_matrix(y_test_aux, y_pred)
labels = ['True Neg','False Pos','False Neg','True Pos']
categories = ['0', '1']
make_confusion_matrix(cf_matrix, 
                      group_names=labels,
                      categories=categories)

Plots

In [None]:
plt.figure()
plt.plot(history['val_acc'])

plt.title('Accuracy para el MNIST con una red evolutiva')

plt.figure()
plt.plot(history['train_loss'])
plt.plot(history['val_loss'])
plt.legend(['Train loss', 'Val loss'])

plt.title('Loss para el MNIST con una red evolutiva')

# Convergence check
plt.figure()
plt.plot(history['train_loss'][5500:6000])
plt.plot(history['val_loss'][5500:6000])
plt.legend(['Train loss', 'Val loss'])

plt.title('Comprobación convergencia del loss para el MNIST con una red evolutiva')

# Trigger analysis
aux = np.diff(np.log(np.array(history['val_loss'])))
plt.figure()
plt.plot(aux[5:])
plt.title('Diferencia del loss en cada epoch')