In [None]:
import pandas as pd
import evalml
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('best_features_close_classifier_lag_2.csv', index_col=0)
df.ww.init()
df

In [None]:
df.describe()

In [None]:
df.info()

## Dividindo a base entre treinamento e teste

In [None]:
X = df.drop(columns=['Target'], axis=1)
y = df['Target']

### Inferindo os tipos de variáveis

In [None]:
from evalml.utils import infer_feature_types
X = infer_feature_types(X)

In [None]:
X.ww

In [None]:
X_train, X_test, y_train, y_test = evalml.preprocessing.split_data(X, y, 
                                                                   problem_type='binary',
                                                                   test_size=0.3,
                                                                   random_seed=111)

### Checando a base de treino

In [None]:
from evalml.data_checks import DefaultDataChecks

data_checks = DefaultDataChecks(problem_type="binary", objective='f1')
data_checks.validate(X_train, y_train)

## Executando AutoML

In [None]:
from evalml.automl import AutoMLSearch

In [None]:
automl = AutoMLSearch(X_train=X_train, y_train=y_train,
                      objective='f1',
                      max_batches=2,
                      optimize_thresholds=True,
                      verbose=True,
                      additional_objectives=['auc'],
                      problem_type='binary',
                      random_seed=111)
automl.search()

### Rankings dos melhores modelos

In [None]:
automl.rankings

In [None]:
# Melhor pipeline
pipeline = automl.best_pipeline

### Descrição do melhor pipeline

In [None]:
automl.describe_pipeline(12)

In [None]:
scores = pipeline.score(X_test, y_test, ["f1", 'auc'])

In [None]:
print('F1 score: {:.5f} - AUC score: {:.5f}'.format(scores['F1'], scores['AUC']))

In [None]:
pipeline.graph()

In [None]:
y_pred = pipeline.predict(X_test)

### Matrix de Confusão

In [None]:
from evalml.model_understanding.graphs import graph_confusion_matrix
graph_confusion_matrix(y_test, y_pred, normalize_method='true')

### Classification Report

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))