<a href="https://colab.research.google.com/github/mantoan-thi/AutoML/blob/main/Day_Trading_with_Machine_Learning_(witn_H2O_AutoML_Automatic_Machine_Learning).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Instalações

In [None]:
!pip install h2o

In [None]:
!pip install yfinance --upgrade --no-cache-dir

# Importando as bibliotecas

In [418]:
import pandas as pd
import h2o
from h2o.automl import H2OAutoML

# Treinando o modelo

In [420]:
def processar(dados, cls,tm):
  data = dados
  classe = cls
  tempo_maximo = tm

  # Identifica dinamicamente as colunas do arquivo csv
  colunas = data.columns.tolist()
  variaveis_independentes = [coluna for coluna in colunas if coluna != classe]

  # Divide os dados em treino e teste
  data = h2o.H2OFrame(data)
  treino, teste = data.split_frame(ratios=[.7])

  # Transforma a variavel dependente em fator
  treino[classe] = treino[classe].asfactor()
  teste[classe] = teste[classe].asfactor()

  # Auto ML
  # Busca o modelo valor gravado no atributo tempo_maximo em segundos. podemos em vez disso definir max_models
  modelo_automl = H2OAutoML(max_runtime_secs=tempo_maximo,sort_metric='AUC')
  #modelo_automl = H2OAutoML(max_models=20, seed=1)
  modelo_automl.train(y=classe, training_frame=treino)

  # Ranking dos melhores AutoML
  ranking = modelo_automl.leaderboard
  #ranking = ranking.as_data_frame()

  # Imprime todas as linhas em vez do padrão (10 linhas)
  ranking.head(rows=ranking.nrows)

  # O modelo líder é armazenado aqui
  modelo_automl.leader

  return ranking, modelo_automl,treino,teste

In [419]:
def prever(aml, dados_test):
  # Para gerar previsões em um conjunto de teste, você pode fazer previsões
  # diretamente no objeto `H2OAutoML` ou no modelo líder
  # objeto diretamente
  preds = aml.predict(test)

  # ou:
  #preds = aml.leader.predict(test)
  return preds

# Coletando e processando dados da Bolsa

importando as bibliotecas

In [421]:
import yfinance as yf #Instalando e importando a API no código

Coletando e processando

In [422]:
def coletar_dados(ativo):
    df = yf.download(ativo, start="2019-01-01", end="2021-04-30")
    df.reset_index(inplace=True)
    df['Tendencia_3d'] = (df['Close'].shift(-1)-df['Open'].shift(-1))+(df['Close'].shift(-2)-df['Open'].shift(-2))+(df['Close'].shift(-3)-df['Open'].shift(-3))
    df['Low_open'] = df.Low-df.Open
    df['High_open'] = df.High-df.Open
    df['Variação'] = df['Adj Close'].pct_change()
    df.dropna(inplace=True)
    df.drop(columns=['Date'],axis=1, inplace=True)
    df['Dir'] = df.apply(lambda x: 1 if x['Tendencia_3d'] > 0 else 0, axis=1)
    return df

Coletando dados do ITAU4

In [423]:
dados = coletar_dados('ITUB4.SA')

[*********************100%***********************]  1 of 1 completed


In [424]:
# Colunas
dados.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Tendencia_3d',
       'Low_open', 'High_open', 'Variação', 'Dir'],
      dtype='object')

In [425]:
# Iremos pegar a coluna Dir
cls = 'Dir'
ranking, modelo_automl,treino,teste = processar(dados,cls,60)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%


In [426]:
# Ranking
rank

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_BestOfFamily_1_AutoML_5_20210918_72330,1.0,0.198083,1.0,0.0,0.221876,0.0492291
DRF_1_AutoML_5_20210918_72330,1.0,0.0969521,1.0,0.0,0.158947,0.0252641
GLM_1_AutoML_5_20210918_72330,1.0,0.0786486,1.0,0.0,0.137284,0.0188469
XRT_1_AutoML_5_20210918_72330,1.0,0.0989431,1.0,0.0,0.160515,0.0257651
StackedEnsemble_BestOfFamily_2_AutoML_5_20210918_72330,0.994152,0.235334,0.988293,0.0263158,0.261358,0.0683082
GBM_5_AutoML_5_20210918_72330,0.994152,0.078667,0.988293,0.0263158,0.164782,0.027153
GBM_grid_1_AutoML_5_20210918_72330_model_1,0.988304,0.122639,0.980161,0.0555556,0.18244,0.0332844
XGBoost_grid_1_AutoML_5_20210918_72330_model_6,0.988304,0.190047,0.975206,0.0263158,0.223554,0.0499765
DeepLearning_grid_1_AutoML_5_20210918_72330_model_1,0.982456,0.174949,0.970996,0.0555556,0.25384,0.0644348
GBM_grid_1_AutoML_5_20210918_72330_model_2,0.976608,0.289268,0.964425,0.0555556,0.27997,0.0783831




In [431]:
# Test
teste

Open,High,Low,Close,Adj Close,Volume,Tendencia_3d,Low_open,High_open,Variação,Dir
36.75,37.61,36.45,37.61,34.1662,21938600.0,0.349998,-0.299999,0.860001,0.0164865,1
37.22,37.71,36.75,36.98,33.5938,24873500.0,0.779999,-0.470001,0.489998,-0.0167507,1
37.68,37.74,36.88,37.29,33.8755,41119800.0,0.43,-0.799999,0.0600014,-0.0155753,1
37.35,37.73,37.24,37.49,34.0572,21283100.0,0.469997,-0.109997,0.380001,-0.00133146,1
37.07,38.35,37.07,38.16,34.6658,22555600.0,-0.300003,0.0,1.28,0.0178712,0
38.6,39.1,38.34,38.81,35.2699,13541300.0,-0.939999,-0.259998,0.5,0.00116112,0
38.67,39.79,38.4,39.69,36.0696,17686500.0,-2.25,-0.269997,1.12,0.0226743,0
38.56,38.8,37.62,38.0,34.5338,71190500.0,-0.859997,-0.940002,0.239998,-0.0425796,0
37.31,37.41,36.6,36.89,33.5251,18345900.0,0.0100021,-0.710003,0.0999985,-0.011257,1
37.69,37.83,37.19,37.5,34.0794,26851200.0,-0.759995,-0.5,0.140003,-0.010293,0




In [432]:
prever(modelo_ml,teste)

stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%


predict,p0,p1
1,0.020089,0.979911
0,0.769681,0.230319
0,0.969276,0.0307238
0,0.833906,0.166094
0,0.645678,0.354322
0,0.936728,0.063272
0,0.988631,0.0113695
0,0.689594,0.310406
0,0.974785,0.0252146
0,0.995266,0.00473401




In [434]:
modelo_ml.leader

Model Details
H2OStackedEnsembleEstimator :  Stacked Ensemble
Model Key:  StackedEnsemble_BestOfFamily_1_AutoML_5_20210918_72330

No model summary for this model

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.025077299893514794
RMSE: 0.1583581380716343
LogLoss: 0.12868831570090986
Null degrees of freedom: 27
Residual degrees of freedom: 26
Null deviance: 35.16472896943475
Residual deviance: 7.2065456792509535
AIC: 11.206545679250954
AUC: 1.0
AUCPR: 1.0
Gini: 1.0

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.6007582682448778: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,19.0,0.0,0.0,(0.0/19.0)
1,1,0.0,9.0,0.0,(0.0/9.0)
2,Total,19.0,9.0,0.0,(0.0/28.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.600758,1.0,8.0
1,max f2,0.600758,1.0,8.0
2,max f0point5,0.600758,1.0,8.0
3,max accuracy,0.600758,1.0,8.0
4,max precision,0.967181,1.0,0.0
5,max recall,0.600758,1.0,8.0
6,max specificity,0.967181,1.0,0.0
7,max absolute_mcc,0.600758,1.0,8.0
8,max min_per_class_accuracy,0.600758,1.0,8.0
9,max mean_per_class_accuracy,0.600758,1.0,8.0



Gains/Lift Table: Avg response rate: 32.14 %, avg score: 32.47 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.035714,0.958697,3.111111,3.111111,1.0,0.967181,1.0,0.967181,0.111111,0.111111,211.111111,211.111111,0.111111
1,2,0.035714,0.950213,0.0,3.111111,0.0,0.0,1.0,0.967181,0.0,0.111111,-100.0,211.111111,0.111111
2,3,0.035714,0.941729,0.0,3.111111,0.0,0.0,1.0,0.967181,0.0,0.111111,-100.0,211.111111,0.111111
3,4,0.071429,0.93496,3.111111,3.111111,1.0,0.935759,1.0,0.95147,0.111111,0.222222,211.111111,211.111111,0.222222
4,5,0.071429,0.932262,0.0,3.111111,0.0,0.0,1.0,0.95147,0.0,0.222222,-100.0,211.111111,0.222222
5,6,0.107143,0.913756,3.111111,3.111111,1.0,0.925767,1.0,0.942902,0.111111,0.333333,211.111111,211.111111,0.333333
6,7,0.178571,0.891255,3.111111,3.111111,1.0,0.900816,1.0,0.926068,0.222222,0.555556,211.111111,211.111111,0.555556
7,8,0.214286,0.809841,3.111111,3.111111,1.0,0.857646,1.0,0.914664,0.111111,0.666667,211.111111,211.111111,0.666667
8,9,0.321429,0.571686,3.111111,3.111111,1.0,0.658618,1.0,0.829316,0.333333,1.0,211.111111,211.111111,1.0
9,10,0.392857,0.209395,0.0,2.545455,0.0,0.277182,0.818182,0.728928,0.0,1.0,-100.0,154.545455,0.894737




ModelMetricsBinomialGLM: stackedensemble
** Reported on cross-validation data. **

MSE: 0.04922906096164813
RMSE: 0.22187622892425435
LogLoss: 0.19808262938870064
Null degrees of freedom: 27
Residual degrees of freedom: 26
Null deviance: 40.15049280591354
Residual deviance: 11.092627245767236
AIC: 15.092627245767236
AUC: 1.0
AUCPR: 1.0
Gini: 1.0

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.4020081248630108: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,19.0,0.0,0.0,(0.0/19.0)
1,1,0.0,9.0,0.0,(0.0/9.0)
2,Total,19.0,9.0,0.0,(0.0/28.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.402008,1.0,8.0
1,max f2,0.402008,1.0,8.0
2,max f0point5,0.402008,1.0,8.0
3,max accuracy,0.402008,1.0,8.0
4,max precision,0.912146,1.0,0.0
5,max recall,0.402008,1.0,8.0
6,max specificity,0.912146,1.0,0.0
7,max absolute_mcc,0.402008,1.0,8.0
8,max min_per_class_accuracy,0.402008,1.0,8.0
9,max mean_per_class_accuracy,0.402008,1.0,8.0



Gains/Lift Table: Avg response rate: 32.14 %, avg score: 30.78 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.035714,0.908939,3.111111,3.111111,1.0,0.912146,1.0,0.912146,0.111111,0.111111,211.111111,211.111111,0.111111
1,2,0.035714,0.905732,0.0,3.111111,0.0,0.0,1.0,0.912146,0.0,0.111111,-100.0,211.111111,0.111111
2,3,0.035714,0.902526,0.0,3.111111,0.0,0.0,1.0,0.912146,0.0,0.111111,-100.0,211.111111,0.111111
3,4,0.071429,0.899234,3.111111,3.111111,1.0,0.900269,1.0,0.906207,0.111111,0.222222,211.111111,211.111111,0.222222
4,5,0.071429,0.895742,0.0,3.111111,0.0,0.0,1.0,0.906207,0.0,0.222222,-100.0,211.111111,0.222222
5,6,0.107143,0.843181,3.111111,3.111111,1.0,0.887334,1.0,0.899916,0.111111,0.333333,211.111111,211.111111,0.333333
6,7,0.178571,0.755699,3.111111,3.111111,1.0,0.790784,1.0,0.856264,0.222222,0.555556,211.111111,211.111111,0.555556
7,8,0.214286,0.668138,3.111111,3.111111,1.0,0.725085,1.0,0.834401,0.111111,0.666667,211.111111,211.111111,0.666667
8,9,0.321429,0.397131,3.111111,3.111111,1.0,0.506711,1.0,0.725171,0.333333,1.0,211.111111,211.111111,1.0
9,10,0.392857,0.29797,0.0,2.545455,0.0,0.331508,0.818182,0.653596,0.0,1.0,-100.0,154.545455,0.894737





