# Bibliotecas

In [None]:
# Bibliotecas mais comuns
import pandas as pd
import numpy as np
import os, time
import random
import statistics
import openpyxl
import sys
import Funcoes as fc

# Bibliotecas para leitura e processamentos dos dados
import statsmodels.api as sm
import pickle

# Bibliotecas para criação de gráficos
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
from scipy.stats import randint

# Bibliotecas para o Projeto RMN
from sklearn import preprocessing
from sklearn.preprocessing import MaxAbsScaler, OneHotEncoder, StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split, RandomizedSearchCV, KFold, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics import accuracy_score

# Leitura dos dados

In [None]:
treino = pd.read_excel('Dados-RMN/Dados-Mauricio/Dados_treino.xlsx').drop('Unnamed: 0', axis = 1)
teste = pd.read_excel('Dados-RMN/Dados-Mauricio/Dados_teste.xlsx').drop('Unnamed: 0', axis = 1)

In [None]:
X_treino = treino.drop(['Amostra', 'Poço', 'Litofacies', 'Categoria Litofacies', 'Bioturbiditos', 'Tempo Distribuicao',
                         'Dolowackstone', 'Grainstone', 'Brechado', 'Porosidade Gas', 'Porosidade RMN', 'Distribuicao T2',
                         'Permeabilidade Gas', 'Fracao Argila', 'Fitting Error', 'T2 Ponderado Log', 'Porosidade i',
                         'S1', 'S2', 'S3', 'S4'], axis = 1)

y_treino = np.log10(treino['Permeabilidade Gas']*1000)

X_teste = teste.drop(['Amostra', 'Poço', 'Litofacies', 'Categoria Litofacies', 'Bioturbiditos', 'Tempo Distribuicao',
                         'Dolowackstone', 'Grainstone', 'Brechado', 'Porosidade Gas', 'Porosidade RMN', 'Distribuicao T2',
                         'Permeabilidade Gas', 'Fracao Argila', 'Fitting Error', 'T2 Ponderado Log', 'Porosidade i',
                         'S1', 'S2', 'S3', 'S4'], axis = 1)

y_teste = np.log10(teste['Permeabilidade Gas']*1000)

In [None]:
def previsao_ML(modelo, X, y_log):
    y = (10**y_log)/1000
    p = modelo.predict(X)
    previsao = (10**p)/1000
    k_p = np.log10(previsao)
    k_g = np.log10(y)
    N = len(k_p)
    soma = np.sum((k_p-k_g)**2)
    raiz = np.sqrt(soma/N)
    sigma = 10**(raiz)
    return sigma

## Random Forest Regressor

In [None]:
tempo_inicial = time.time()
rfr = fc.fitRandomForestRegressor(X_treino, y_treino, Versao = 2.9, n_jobs = 64, Pasta_Salvamento = 'Dados-RMN/Dados-Mauricio/Machine-Learning/Random Forest/',
                                  n_estimators = np.arange(750, 802, 2),
                                  criterion = ['squared_error', 'absolute_error'],
                                  max_depth = [None],
                                  min_samples_split = [2, 3, 4],
                                  min_samples_leaf = [1, 2, 3],
                                  min_weight_fraction_leaf = [0.0],
                                  max_features = [1.0, 'sqrt', 'log2'],
                                  max_leaf_nodes = [None],
                                  min_impurity_decrease = [0.0],
                                  bootstrap = [True],
                                  oob_score = [False],
                                  warm_start = [False],
                                  ccp_alpha = [0.0],
                                  max_samples = [None])

tempo_final = time.time()
print(f'{tempo_final - tempo_inicial} segundos')

1555.8646471500397 segundos


In [None]:
print(previsao_ML(rfr, X_treino, y_treino))
print(previsao_ML(rfr, X_teste, y_teste))

1.7054834787260134
3.6985990032900538


In [None]:
rfr.best_params_

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 3,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 756,
 'oob_score': False}

## Gradient Boosting Regressor

In [None]:
tempo_inicial = time.time()
grb = fc.fitGradienteBoosting(X_treino, y_treino, Versao = 2.1, n_jobs = 64, Pasta_Salvamento = 'Dados-RMN/Dados-Mauricio/Machine-Learning/Gradient Boosting/',
                             loss = ['squared_error', 'absolute_error'],
                              learning_rate = [0.001, 0.01, 0.05],
                              n_estimators = np.arange(896, 1920, 128),
                              subsample = [1.0],
                              criterion = ['friedman_mse', 'squared_error'],
                              min_samples_split = [2, 3],
                              min_samples_leaf = [3, 4, 5],
                              min_weight_fraction_leaf = [0.0],
                              max_depth = [4,5,6],
                              min_impurity_decrease = [0.0],
                              init = [None],
                              max_features = [None, 'sqrt', 'log2'],
                              alpha = [0.9],
                              max_leaf_nodes = [None],
                              warm_start = [False],
                              validation_fraction = [0.1],
                              n_iter_no_change = [None],
                              tol = [1e-04, 1e-05],
                              ccp_alpha = [0.0])


tempo_final = time.time()
print(f'{tempo_final - tempo_inicial} segundos')

7753.693201541901 segundos


In [None]:
print(previsao_ML(grb, X_treino, y_treino))
print(previsao_ML(grb, X_teste, y_teste))

1.8357473166521678
3.56045195515427


In [None]:
grb.best_params_

{'alpha': 0.9,
 'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.05,
 'loss': 'absolute_error',
 'max_depth': 5,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 5,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 1792,
 'n_iter_no_change': None,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'warm_start': False}

## Multi Layer Perceptron

In [None]:
tempo_inicial = time.time()

mlp = fc.fitMultiLayerPercepetronRegressor (X_treino, y_treino, Pasta_Salvamento = 'Dados-RMN/Dados-Mauricio/Machine-Learning/MultiLayerPerceptron/', Versao = 1.23, n_jobs = 64,
                                            hidden_layer_sizes= [(72, 72, 80,), (72, 80, 88,), (72, 88, 88,)],
                                            activation= ['relu', 'tanh'],
                                            solver= ['adam'],
                                            alpha= [1e-02, 1e-03],
                                            learning_rate= ['constant'],
                                            learning_rate_init= [1e-02, 1e-03],
                                            power_t= [0.5],
                                            max_iter= 32768,
                                            tol= [1e-04, 1e-03],
                                            warm_start= [False],
                                            momentum= [0.9],
                                            nesterovs_momentum= [True],
                                            early_stopping= [False],
                                            validation_fraction= [0.1],
                                            beta_1= [0.9, 0.999, 0.85],
                                            beta_2= [0.999, 0.9, 0.85],
                                            epsilon= [1e-08, 1e-07],
                                            n_iter_no_change= [256])
tempo_final = time.time()
print(f'{tempo_final - tempo_inicial} segundos')

2015.198823928833 segundos


In [None]:
print(previsao_ML(mlp, X_treino, y_treino))
print(previsao_ML(mlp, X_teste, y_teste))

3.155471762299259
4.3211121841866875


In [None]:
mlp.best_params_

{'activation': 'tanh',
 'alpha': 0.001,
 'beta_1': 0.999,
 'beta_2': 0.999,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (72, 72, 80),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'momentum': 0.9,
 'n_iter_no_change': 256,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'solver': 'adam',
 'tol': 0.001,
 'validation_fraction': 0.1,
 'warm_start': False}