# <font color='blue'>Data Science Challenge @ ITA 2022</font>
# <font color='blue'>Equipe DIOMGIS</font>

## <font color='blue'>Fase 1</font>

### <font color='blue'>TEMA DO DESAFIO</font>

![title](..\data\image\logo.jpeg)

In [1]:
# Versão da Linguagem Python
from platform import python_version
print('Versão da Linguagem Python Usada Neste Jupyter Notebook:', python_version())

Versão da Linguagem Python Usada Neste Jupyter Notebook: 3.9.12


In [2]:
# Para atualizar um pacote, execute o comando abaixo no terminal ou prompt de comando:
# pip install -U nome_pacote

# Para instalar a versão exata de um pacote, execute o comando abaixo no terminal ou prompt de comando:
#!pip install nome_pacote==versão_desejada

# Depois de instalar ou atualizar o pacote, reinicie o jupyter notebook.

# Instala o pacote watermark. 
# Esse pacote é usado para gravar as versões de outros pacotes usados neste jupyter notebook.
#!pip install -q -U watermark

# Instala o pacote tensorboard-plugin-profile. 
# Esse pacote é usado para incrementar funcioalidades no Tensorboard.
#!pip install -U tensorboard-plugin-profile

In [3]:
# Imports
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras.wrappers.scikit_learn import KerasClassifier
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau 
from keras.callbacks import TensorBoard
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from time import time
import os

In [4]:
sns.set_style('whitegrid')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
%load_ext tensorboard
%matplotlib inline
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [5]:
seed = 25
np.random.seed(seed)

In [6]:
# Versões dos pacotes usados neste jupyter notebook
%reload_ext watermark
%watermark -a "Equipe DIOMGIS" --iversions

Author: Equipe DIOMGIS

keras     : 2.10.0
tensorflow: 2.10.0
pandas    : 1.4.2
matplotlib: 3.5.1
numpy     : 1.22.3
seaborn   : 0.11.2



In [7]:
#Confirme se o TensorFlow pode acessar a GPU.
device_name = tf.test.gpu_device_name()
if not device_name:
    raise SystemError('GPU device not found')
    
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [8]:
!nvidia-smi

Sun Oct  9 17:56:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 516.94       Driver Version: 516.94       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:65:00.0  On |                  N/A |
| 60%   26C    P2    27W / 220W |    979MiB /  8192MiB |      4%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [9]:
# Gerando dados sintéticos
size_sample = 200000

# Dados de Treino e teste
# x
x1 = np.random.randint(0, 100, size_sample)
x2 = np.random.randint(0, 100, size_sample)
x_treino = np.dstack((x1, x2))[0]

# y
y_treino = 3*(x1**(1/2)) + 2*(x2**2)

# Dados de Validação
# x
x1 = np.random.randint(0, 100, int(0.1 * size_sample))
x2 = np.random.randint(0, 100, int(0.1 * size_sample))
x_teste = np.dstack((x1, x2))[0]

# y
y_teste = 3*(x1**(1/2)) + 2*(x2**2)

In [10]:
epochs = 300
batch_size = 128
nKFold = 5
verbose = 2

In [11]:
def create_model(optimizer,
                 n_dense1,
                 n_dense2,
                 n_dense3,
                 activation1,
                 activation2,
                 activation3,
                 dropout):
    
    model = Sequential()
    model.add(Dense(n_dense1, input_shape = (2,) , activation = activation1))
    model.add(Dropout(dropout))
    model.add(Dense(n_dense2, activation = activation2))
    model.add(Dropout(dropout))
    model.add(Dense(n_dense3, activation = activation3))
    model.add(Dense(1))
    
    model.compile(loss='mse', optimizer=optimizer, metrics=["mse"])
    
    return model

In [12]:
checkpoint = ModelCheckpoint(filepath = "saveModel/bestModel", 
                             monitor='val_mse',
                             mode='min',
                             save_best_only=True,
                             save_weights_only=False,
                             verbose = verbose)
    
tensorboard_callback = TensorBoard(log_dir="logs/{}".format(time()))

earlystop = EarlyStopping(monitor='val_mse',
                              min_delta=0,
                              patience=20,
                              verbose = verbose,
                              restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_mse',
                              factor=0.25,
                              patience=5,
                              mode="min",
                              verbose = verbose,
                              min_delta=0.00001)

callbacks = [tensorboard_callback, earlystop, reduce_lr] # checkpoint

In [13]:
# Modelo
model = KerasRegressor(build_fn = create_model,
                        verbose = verbose,
                        callbacks = callbacks)

In [14]:
#Pipeline
steps = [("model", model)]

estimator = Pipeline(steps, verbose = verbose)

In [15]:
# Definição dos parametros (GridSearch)

# Optimizer
learning_rate = 0.01

opt_SGD = SGD(
    learning_rate = learning_rate,
    momentum = 0.0,
    nesterov = False)

opt_RMSprop = RMSprop(
    learning_rate = learning_rate,
    rho = 0.9,
    momentum = 0.0,
    epsilon = 1e-07,
    centered = False)

opt_Adam = Adam(
    learning_rate = learning_rate,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07,
    amsgrad = False)

opt_Adadelta = Adadelta(
    learning_rate = learning_rate,
    rho = 0.95,
    epsilon = 1e-07)

opt_Adagrad = Adagrad(
    learning_rate = learning_rate,
    initial_accumulator_value = 0.1,
    epsilon = 1e-07)

opt_Adamax = Adamax(
    learning_rate = learning_rate,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07)

opt_Nadam = Nadam(
    learning_rate = learning_rate,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07)

opt_Ftrl = Ftrl(
    learning_rate = learning_rate,
    learning_rate_power = -0.5,
    initial_accumulator_value = 0.1,
    l1_regularization_strength = 0.0,
    l2_regularization_strength = 0.0,
    l2_shrinkage_regularization_strength = 0.0,
    beta = 0.0)

In [16]:
# Outros parametros
params_grid = {
    # 'model__optimizer': [opt_SGD, opt_RMSprop, opt_Adam, opt_Adadelta, opt_Adagrad, opt_Adamax, opt_Nadam, opt_Ftrl],
    'model__optimizer': [opt_Ftrl],
    'model__n_dense1': [128],
    'model__n_dense2': [128],
    'model__n_dense3': [128], 
    'model__activation1': ['relu'],
    'model__activation2': ['relu'],
    'model__activation3': ['relu'],
    'model__dropout': [0]
}

In [17]:
grid = GridSearchCV(estimator = estimator,  
                    verbose = verbose,
                    return_train_score = True,
                    cv = nKFold,
                    param_grid = params_grid)

In [18]:
# Monitoramento de Otimização
%tensorboard --logdir=logs/

Reusing TensorBoard on port 6006 (pid 13440), started 1 day, 6:40:11 ago. (Use '!kill 13440' to kill it.)

In [None]:
# Treinamento
fit_params = {
    'model__batch_size': batch_size,
    'model__epochs': epochs,
    'model__verbose': verbose,
    'model__validation_data': (x_teste, y_teste),
    'model__shuffle': True,
    'model__validation_steps': None,
    'model__validation_freq': 1,
}

grid_result = grid.fit(x_treino, y_treino, **fit_params)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Epoch 1/300
1250/1250 - 7s - loss: 4013160.7500 - mse: 4013160.7500 - val_loss: 2798705.5000 - val_mse: 2798705.5000 - lr: 0.0100 - 7s/epoch - 5ms/step
Epoch 2/300
1250/1250 - 4s - loss: 2702635.0000 - mse: 2702635.0000 - val_loss: 2537774.0000 - val_mse: 2537774.0000 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 3/300
1250/1250 - 4s - loss: 2396147.5000 - mse: 2396147.5000 - val_loss: 2137882.7500 - val_mse: 2137882.7500 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 4/300
1250/1250 - 4s - loss: 1777276.8750 - mse: 1777276.8750 - val_loss: 1309144.1250 - val_mse: 1309144.1250 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 5/300
1250/1250 - 5s - loss: 912096.1250 - mse: 912096.1250 - val_loss: 589907.5625 - val_mse: 589907.5625 - lr: 0.0100 - 5s/epoch - 4ms/step
Epoch 6/300
1250/1250 - 5s - loss: 441033.4688 - mse: 441033.4688 - val_loss: 334325.5938 - val_mse: 334325.5938 - lr: 0.0100 - 5s/epoch - 4ms/step
Epoch 7/300
1250/1250 - 5s - loss: 26

Epoch 58/300
1250/1250 - 4s - loss: 1314.1908 - mse: 1314.1908 - val_loss: 1348.0474 - val_mse: 1348.0474 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 59/300
1250/1250 - 4s - loss: 1264.0657 - mse: 1264.0657 - val_loss: 1258.1970 - val_mse: 1258.1970 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 60/300
1250/1250 - 4s - loss: 1213.8442 - mse: 1213.8442 - val_loss: 1278.9871 - val_mse: 1278.9871 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 61/300
1250/1250 - 4s - loss: 1173.7528 - mse: 1173.7528 - val_loss: 1183.0587 - val_mse: 1183.0587 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 62/300
1250/1250 - 4s - loss: 1135.9518 - mse: 1135.9518 - val_loss: 1119.3676 - val_mse: 1119.3676 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 63/300
1250/1250 - 4s - loss: 1103.0491 - mse: 1103.0491 - val_loss: 1094.0964 - val_mse: 1094.0964 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 64/300
1250/1250 - 4s - loss: 1063.4984 - mse: 1063.4984 - val_loss: 1063.8689 - val_mse: 1063.8689 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 65/300


Epoch 118/300
1250/1250 - 4s - loss: 382.3553 - mse: 382.3553 - val_loss: 390.5056 - val_mse: 390.5056 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 119/300
1250/1250 - 4s - loss: 378.9920 - mse: 378.9920 - val_loss: 413.5252 - val_mse: 413.5252 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 120/300
1250/1250 - 4s - loss: 376.1096 - mse: 376.1096 - val_loss: 388.1307 - val_mse: 388.1307 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 121/300
1250/1250 - 4s - loss: 373.3220 - mse: 373.3220 - val_loss: 365.3223 - val_mse: 365.3223 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 122/300
1250/1250 - 4s - loss: 369.8264 - mse: 369.8264 - val_loss: 367.0096 - val_mse: 367.0096 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 123/300
1250/1250 - 4s - loss: 366.0761 - mse: 366.0761 - val_loss: 455.5177 - val_mse: 455.5177 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 124/300
1250/1250 - 4s - loss: 364.0102 - mse: 364.0102 - val_loss: 361.2805 - val_mse: 361.2805 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 125/300
1250/1250 - 4s - los

Epoch 178/300
1250/1250 - 4s - loss: 251.7636 - mse: 251.7636 - val_loss: 260.0312 - val_mse: 260.0312 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 179/300
1250/1250 - 4s - loss: 250.9942 - mse: 250.9942 - val_loss: 250.7883 - val_mse: 250.7883 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 180/300
1250/1250 - 4s - loss: 250.2538 - mse: 250.2538 - val_loss: 289.8398 - val_mse: 289.8398 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 181/300
1250/1250 - 4s - loss: 248.9292 - mse: 248.9292 - val_loss: 256.5995 - val_mse: 256.5995 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 182/300
1250/1250 - 4s - loss: 247.7168 - mse: 247.7168 - val_loss: 248.3056 - val_mse: 248.3056 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 183/300
1250/1250 - 4s - loss: 246.9342 - mse: 246.9342 - val_loss: 260.2282 - val_mse: 260.2282 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 184/300
1250/1250 - 4s - loss: 246.4171 - mse: 246.4171 - val_loss: 244.2395 - val_mse: 244.2395 - lr: 0.0100 - 4s/epoch - 3ms/step
Epoch 185/300
1250/1250 - 4s - los

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
pd.DataFrame(grid.cv_results_)

In [None]:
best_params = grid_result.best_params_

In [None]:
best_model = grid.best_estimator_

In [None]:
# Treinamento do melhor modelo por mais épocas

fit_params = {
    'model__batch_size': batch_size,
    'model__epochs': epochs,
    'model__verbose': verbose,
    'model__validation_data': (x_teste, y_teste),
    'model__shuffle': True,
    'model__validation_steps': None,
    'model__validation_freq': 1,
}

# add -> abaro
best_params


grid_result = estimator.fit(x_treino, y_treino, **fit_params)

# OU: ...
#grid_result = best_model.fit(x_treino, y_treino, **fit_params)

In [None]:
best_model.predict(np.array([[16, 4]]))

## Carregando o Conjunto de dados

## Análise Exploratória de Dados

### Análise n - XXX

## Pré-Processamento de Dados Para Construção de Modelos de Machine Learning

### Padronização

###  Construção, Treinamento e Avaliação do Modelo 1 com Regressão Linear (Benchmark)

### Avaliação do Modelo

### Métricas

### Resíduos

###  Construção, Treinamento e Avaliação do Modelo n com XXX

## Seleção do Modelo

## Conclusão

# Fim