In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Beginner Notebook For PyCaret**

[Link to PyCaret Website](http://https://pycaret.org/)

![](https://pycaret.org/wp-content/uploads/2020/03/Divi93_43.png)

What is PyCaret?
PyCaret is an open source low-code machine learning library in Python that aims to reduce the hypothesis to insights cycle time in a ML experiment. It enables data scientists to perform end-to-end experiments quickly and efficiently. In comparison with the other open source machine learning libraries, PyCaret is an alternate low-code library that can be used to perform complex machine learning tasks with only few lines of code. PyCaret is simple and easy to use

# **Import Libraries**

In [None]:
import missingno as msno
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

# **Extra Libaries**

In [None]:
!pip install pycaret

In [None]:
from pycaret.classification import *

# **Read Dataset**

In [None]:
dataset=pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
dataset.head(5)

There are 13 columns and 299 entries in dataset

# **Missing Values**

Output shows we have no missing data

In [None]:
dataset.isnull().sum()

# **Target Variable**

We have a imbalanced dataset.
Label 0 has 203 and 1 has 96 entries


In [None]:
sns.countplot(dataset.DEATH_EVENT,palette='rainbow', alpha=0.75)

# **Data Processing**

In [None]:
data = dataset.sample(frac=0.95, random_state=786)
data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)
print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

# **Setup PyCaret Environment**

We set fix_imbalance to True. Also setting Silent parameter True top prevent pycaret setup function interactively waiting an input response from screen.

In [None]:
clf1 = setup(data = data, target = 'DEATH_EVENT', session_id=55, silent=True,fix_imbalance = True)


# **Compare All Models**

Just included specific models to show most of the features of PyCaret in notebook

In [None]:
best_model = compare_models(include=['dt','knn','mlp','rf','et','xgboost'])

In [None]:
print(best_model)

In [None]:
models()

# **Create Model**

Best model was XGBoost according the results and output of compare_models function

In [None]:
model = create_model('xgboost')

# **Interpret Model**

See feature importance output with SHAP


In [None]:
#FIX: https://github.com/pycaret/pycaret/issues/377
mybooster = model.get_booster()
model_bytearray = mybooster.save_raw()[4:]
def myfun(self=None):return model_bytearray

mybooster.save_raw = myfun

interpret_model(model)

# **Interpret Observations with SHAP**

In [None]:
interpret_model(model, plot = 'reason', observation = 10)

# **HyperParamter Tuning**

Automated tunning of selected model for better accuracy with tune_model function

In [None]:
tuned_model= tune_model(model)

# **Plot Model**

# **AUC Plot**

In [None]:
plot_model(tuned_model, plot = 'auc')

# **Precision Recall Curve**

In [None]:
plot_model(tuned_model, plot = 'pr')

# **Model Feature Importance**

In [None]:
plot_model(tuned_model, plot='feature')

# **Confusion Matrix**

In [None]:
plot_model(tuned_model, plot = 'confusion_matrix')

evalute_model interactive UI for model analysis

# **Evalute Model UI**

In [None]:
evaluate_model(tuned_model)

# **Finally Predict Test Data**

In [None]:
predict_model(tuned_model)