# Pycaret

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
root_dir = '/content/drive/MyDrive/'
os.chdir(root_dir)

## Installation

In [None]:
# !pip install pycaret==2.3.10

In [None]:
# !pip install jinja2==3.1.2

In [None]:
# !pip install xgboost==1.6.0

In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pycaret.classification import setup, compare_models, predict_model, blend_models, finalize_model
from pycaret.classification import plot_model, save_model, load_model, add_metric
# from pycaret.regression import setup, compare_models, predict_model, blend_models, finalize_model
# from pycaret.regression import plot_model, save_model, load_model, add_metric

METRIC = 'Accuracy'
TARGET = 'label'

## Load Data

In [None]:
train = pd.read_csv('')

## Model Fitting (Accuracy)

In [None]:
model = setup(data=train,
            target=TARGET,
            train_size=0.7,
            session_id=0)

In [None]:
top_3_models = compare_models(sort=METRIC, n_select=3)
# Other options are 'AUC', 'Recall', 'Precision', 'F1', 'Kappa' and 'MCC'

In [None]:
model_top = top_3_models[0]
plot_model(model_top, plot='auc')

## Blending

In [None]:
blended = blend_models(estimator_list=top_3_models,
                        fold=10,
                        optimize=METRIC,
                        method='hard')

In [None]:
predict_model(blended)

In [None]:
final_blended = finalize_model(blended)
print(final_blended)

## Predict

In [None]:
pred = predict_model(final_blended)
true_pred = pd.DataFrame([train[TARGET],pred['Label']]).T
true_pred.head()

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(20, 8))
plt.plot(pred['Label'], label='pred')
plt.plot(pred[TARGET], label='true')
plt.legend(fontsize=20)
plt.show()

## Save & Load

In [None]:
save_model(final_blended, 'final_blended')

In [None]:
from google.colab import files
files.download('final_blended.pkl')

In [None]:
loaded_model = load_model('final_blended')

## Add Metric (LogLoss)

In [None]:
from sklearn.metrics import log_loss
METRIC = 'logloss'

add_metric(METRIC, 'LogLoss', log_loss, greater_is_better=False, target='pred_proba')

In [None]:
# svm, ridge는 predict_proba 미지원으로 제외
top_5_models = compare_models(sort=METRIC,
                            n_select=3,
                            exclude=['svm','ridge'])