# <font color='blue'>Data Science Challenge @ ITA 2022</font>
# <font color='blue'>Equipe DIOMGIS</font>

## <font color='blue'>Fase 1</font>

### <font color='blue'>TEMA DO DESAFIO</font>

![title](..\data\image\logo.jpeg)

In [None]:
# Versão da Linguagem Python
from platform import python_version
print('Versão da Linguagem Python Usada Neste Jupyter Notebook:', python_version())

In [None]:
# Para atualizar um pacote, execute o comando abaixo no terminal ou prompt de comando:
# pip install -U nome_pacote

# Para instalar a versão exata de um pacote, execute o comando abaixo no terminal ou prompt de comando:
#!pip install nome_pacote==versão_desejada

# Depois de instalar ou atualizar o pacote, reinicie o jupyter notebook.

# Instala o pacote watermark. 
# Esse pacote é usado para gravar as versões de outros pacotes usados neste jupyter notebook.
#!pip install -q -U watermark

# Instala o pacote tensorboard-plugin-profile. 
# Esse pacote é usado para incrementar funcioalidades no Tensorboard.
#!pip install -U tensorboard-plugin-profile

In [None]:
# Bibliotecas e Frameworks

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import *
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau, LambdaCallback, TerminateOnNaN
from keras.wrappers.scikit_learn import KerasClassifier #
from keras.wrappers.scikit_learn import KerasRegressor
from keras.initializers import GlorotUniform
from keras.regularizers import L1L2
from tensorboard import notebook
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, make_scorer
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from time import time
from datetime import datetime
import os

In [None]:
# Versões dos pacotes usados neste jupyter notebook

%reload_ext watermark
%watermark -a "Equipe DIOMGIS" --iversions

In [None]:
sns.set_style('whitegrid')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
%load_ext tensorboard
%matplotlib inline
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
verbose = 2
seed = 25

np.random.seed(seed)

In [None]:
#Confirma se o TensorFlow pode acessar a GPU

device_name = tf.test.gpu_device_name()
if not device_name:
    raise SystemError('GPU device not found')
    
print('Found GPU at: {}'.format(device_name))

In [None]:
# Estado da GPU

!nvidia-smi

In [None]:
# Gerador dados sintéticos

size_sample = 200000

# Dados de Treino
# x
x1 = 10 * np.random.random(size_sample)
x2 = 10 * np.random.random(size_sample)
x3 = 10 * np.random.random(size_sample)
x4 = 10 * np.random.random(size_sample)
x5 = 10 * np.random.random(size_sample)
x6 = 10 * np.random.random(size_sample)
x7 = 10 * np.random.random(size_sample)

x_treino = np.dstack((x1, x2, x3, x4, x5, x6, x7))[0]

# y
y_treino = 3*(x1**(1/2)) + 2*(x2**2) + 4*x3 - 5*(x4**(3/2)) + x5 + x6**(3) - x7

# Dados de Validação
# x
x1 = 10 * np.random.random(int(0.1 * size_sample))
x2 = 10 * np.random.random(int(0.1 * size_sample))
x3 = 10 * np.random.random(int(0.1 * size_sample))
x4 = 10 * np.random.random(int(0.1 * size_sample))
x5 = 10 * np.random.random(int(0.1 * size_sample))
x6 = 10 * np.random.random(int(0.1 * size_sample))
x7 = 10 * np.random.random(int(0.1 * size_sample))

x_teste = np.dstack((x1, x2, x3, x4, x5, x6, x7))[0]

# y
y_teste = 3*(x1**(1/2)) + 2*(x2**2) + 4*x3 - 5*(x4**(3/2)) + x5 + x6**(3) - x7

In [None]:
# Parametros fixos de treinamento
epochs = 200
batch_size = 128
nKFold = 5
nPCA = 7

In [None]:
# Principal Component Analysis (PCA)
'''
reduceDim = PCA(n_components = nPCA,
                # copy = True,
                # whiten = False,
                # svd_solver = 'auto',
                # tol = 0.0,
                # iterated_power = 'auto',
                # n_oversamples = 10,
                # power_iteration_normalizer = 'auto',
                # random_state = None
               )

reduceDim.fit(x_treino)

x_treino = reduceDim.transform(x_treino)
x_teste = reduceDim.transform(x_teste)
'''

In [None]:
# Callbacks

checkpoint = ModelCheckpoint(filepath = "saveModel/bestModel", 
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             save_weights_only=False,
                             verbose = verbose)
    
tensorboard = TensorBoard(log_dir="logs_reg/{}".format(time()))

earlystop = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=20,
                              verbose = verbose,
                              restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='loss',
                              factor=0.2,
                              patience=3,
                              mode="min",
                              verbose = verbose,
                              min_delta=0.00001,
                              min_lr=0)

lambdaCB = LambdaCallback(on_epoch_begin=None,
                          on_epoch_end=None,
                          on_batch_begin=None,
                          on_batch_end=None,
                          on_train_begin=None,
                          on_train_end=None)

callbacks = [tensorboard, earlystop, reduce_lr, TerminateOnNaN()] # checkpoint, lambdaCB

In [None]:
def create_model(optimizer,
                 layers,
                 n_dense,
                 activationA,
                 activationB,
                 regL1,
                 regL2,
                 dropout):
    
    stdInitializer = GlorotUniform(seed)
    regularizer = L1L2(l1 = regL1, l2 = regL2)
    
    activations = [activationA, activationB]
    currentActivation = 1
    
    #----Alternar entre as funções de ativação-----
    func = activations[currentActivation]
    currentActivation = 1 - currentActivation
    #----------------------------------------------
    
    model = Sequential()
    
    model.add(Dense(n_dense,
                    kernel_initializer = stdInitializer,
                    kernel_regularizer = regularizer,
                    bias_regularizer = regularizer,
                    activation = func,
                    input_shape = (nPCA,)))
    
    
    
    #################################################################
    
    for layer in range(layers):
        
        #----Alternar entre as funções de ativação-----
        func = activations[currentActivation]
        currentActivation = 1 - currentActivation
        #----------------------------------------------
        
        model.add(Dropout(dropout))
        
        model.add(Dense(n_dense,
                        kernel_initializer = stdInitializer,
                        kernel_regularizer = regularizer,
                        bias_regularizer = regularizer,
                        activation = func))
    
    
    ##################################################################
    
    model.add(Dense(1,
                    kernel_initializer = stdInitializer,
                    kernel_regularizer = regularizer,
                    bias_regularizer = regularizer,))
    
    #--------------Loss Function--------------------------
    # Lmae = keras.losses.MeanAbsoluteError()
    # Lmape = keras.losses.MeanAbsolutePercentageError()
    Lmse = keras.losses.MeanSquaredError()
    # Lmsle = keras.losses.MeanSquaredLogarithmicError()
    #-----------------------------------------------------
    #--------------Metric Function------------------------
    # Mmae = keras.metrics.MeanAbsoluteError()
    # Mmape = keras.metrics.MeanAbsolutePercentageError()
    # Mmse = keras.metrics.MeanSquaredError()
    # Mmsle = keras.metrics.MeanSquaredLogarithmicError()
    # Mrmse = keras.metrics.RootMeanSquaredError()
    #-----------------------------------------------------
    
    
    model.compile(loss= Lmse,
                  optimizer=optimizer)
                  # metrics=[Mmse])

    return model

In [None]:
# Modelo
model = KerasRegressor(build_fn = create_model,
                        verbose = verbose,
                        callbacks = callbacks)

In [None]:
#Pipeline
steps = [("model", model)]

estimator = Pipeline(steps, verbose = verbose)

In [None]:
# Definição dos parametros (GridSearch)

# Optimizer
learning_rate = 0.01

opt_SGD = SGD(
    learning_rate = learning_rate,
    momentum = 0.0,
    nesterov = False)

opt_RMSprop = RMSprop(
    learning_rate = learning_rate,
    rho = 0.9,
    momentum = 0.0,
    epsilon = 1e-07,
    centered = False)

opt_Adam = Adam(
    learning_rate = learning_rate,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07,
    amsgrad = False)

opt_Adadelta = Adadelta(
    learning_rate = learning_rate,
    rho = 0.95,
    epsilon = 1e-07)

opt_Adagrad = Adagrad(
    learning_rate = learning_rate,
    initial_accumulator_value = 0.1,
    epsilon = 1e-07)

opt_Adamax = Adamax(
    learning_rate = learning_rate,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07)

opt_Nadam = Nadam(
    learning_rate = learning_rate,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07)

opt_Ftrl = Ftrl(
    learning_rate = learning_rate,
    learning_rate_power = -0.5,
    initial_accumulator_value = 0.1,
    l1_regularization_strength = 0.0,
    l2_regularization_strength = 0.0,
    l2_shrinkage_regularization_strength = 0.0,
    beta = 0.0)

params_grid = {
    
    # optimizer [opt_SGD, opt_RMSprop, opt_Adam, opt_Adadelta, opt_Adagrad, opt_Adamax, opt_Nadam, opt_Ftrl]
    'model__optimizer': [opt_Adam],
    
    # Número de camadas
    'model__layers': [3, 4],
    
    # Neuronios por camada
    'model__n_dense': [128],
    
    # activation ['relu', 'sigmoid', 'tanh', 'selu', 'elu']
    'model__activationA': ['sigmoid'],
    'model__activationB': ['sigmoid'],
    
    # Ridge regularizer
    'model__regL1': [0],
    
    # Lasso regularizer
    'model__regL2': [0],
    
    # Dropout regularizer
    'model__dropout': [0]
}

In [None]:
# Grid Search e Cross Validation

grid = GridSearchCV(estimator = estimator,
                    # negative mean square error
                    scoring = make_scorer(score_func = mean_squared_error, greater_is_better = False),
                    verbose = verbose,
                    return_train_score = False,
                    cv = nKFold,
                    # n_jobs = -2 # "-2": mantem 1 processador livre
                    # pre_dispatch = '2*n_jobs',
                    refit = True,
                    param_grid = params_grid)

In [None]:
# View grid

grid

In [None]:
# Monitoramento de Otimização

# tensorboard --logdir=logs/
notebook.display(port=6006, height=1000)

In [None]:
# Treinamento

fit_params = {
    'model__batch_size': batch_size,
    'model__epochs': epochs,
    'model__verbose': verbose,
    'model__validation_data': (x_teste, y_teste),
    'model__shuffle': True,
    'model__validation_steps': None,
    'model__validation_freq': 1,
}

grid_result = grid.fit(x_treino, y_treino, **fit_params)

In [None]:
# Resultado do SearchGridCV

pd.concat([
           pd.DataFrame(grid.cv_results_)[['rank_test_score', 'mean_test_score', 'mean_fit_time']],
           pd.DataFrame(grid.cv_results_['params'])
          ],
           axis=1,
           join='inner').set_index('rank_test_score').sort_values('rank_test_score')

# Função score com base no SearchGridCV

In [None]:
best_params = grid.best_params_
best_model = grid.best_estimator_

In [None]:
# negative mean square error - Função score do Modelo Keras encapsulado
best_model.score(x_teste, y_teste)

In [None]:
# negative mean square error - Função score do Modelo Keras encapsulado
best_model.score(x_treino, y_treino)

In [None]:
pd.DataFrame({'y': y_teste, 'previsao': best_model.predict(x_teste)})

In [None]:
sns.residplot(best_model.predict(x_teste), y_teste);

## Carregando o Conjunto de dados

## Análise Exploratória de Dados

### Análise n - XXX

## Pré-Processamento de Dados Para Construção de Modelos de Machine Learning

### Padronização

###  Construção, Treinamento e Avaliação do Modelo 1 com Regressão Linear (Benchmark)

In [None]:
reg = LinearRegression().fit(x_treino, y_treino)

In [None]:
# coefficient of determination

reg.score(x_teste, y_teste)

In [None]:
regLoss = ((reg.predict(x_teste) - y_teste)**2).mean()
regLoss

In [None]:
pd.DataFrame({'y': y_teste, 'previsao': reg.predict(x_teste)})

In [None]:
 sns.residplot (reg.predict(x_teste), y_teste);

### Avaliação do Modelo

### Métricas

### Resíduos

###  Construção, Treinamento e Avaliação do Modelo n com XXX

## Seleção do Modelo

## Conclusão

# Fim