# 1. Install and Import Dependencies

In [None]:
!pip install pycaret pandas shap

In [2]:
import pandas as pd
from pycaret.classification import *

# 2. Load Data

In [3]:
df = pd.read_csv('heart.csv')

In [4]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

# 3. Train and Evaluate Model

In [6]:
cat_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'thal']

In [8]:
experiment = setup(df, target='target', categorical_features=cat_features)

Unnamed: 0,Description,Value
0,session_id,8760
1,Target,target
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(303, 14)"
5,Missing Values,False
6,Numeric Features,5
7,Categorical Features,8
8,Ordinal Features,False
9,High Cardinality Features,False


In [9]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.8299,0.0,0.8788,0.8338,0.8488,0.6518,0.6672,0.003
lda,Linear Discriminant Analysis,0.8251,0.9058,0.8697,0.831,0.8428,0.6426,0.6581,0.003
lr,Logistic Regression,0.8206,0.9015,0.8606,0.8248,0.8384,0.6334,0.644,0.306
nb,Naive Bayes,0.8203,0.8805,0.8788,0.8131,0.8424,0.633,0.6423,0.003
et,Extra Trees Classifier,0.8108,0.8845,0.8371,0.8333,0.8269,0.6169,0.6327,0.085
rf,Random Forest Classifier,0.8065,0.8818,0.8455,0.8115,0.8229,0.6065,0.6165,0.095
ada,Ada Boost Classifier,0.7779,0.8087,0.8265,0.7883,0.8027,0.5453,0.5547,0.013
lightgbm,Light Gradient Boosting Machine,0.7729,0.8655,0.8182,0.7879,0.7966,0.5337,0.5443,0.008
gbc,Gradient Boosting Classifier,0.7496,0.8426,0.7932,0.764,0.7726,0.4884,0.4968,0.01
dt,Decision Tree Classifier,0.674,0.6681,0.7152,0.7021,0.7032,0.3368,0.3415,0.003


# 4. Test Model

In [13]:
predict_model(best_model, df.tail())

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,Label
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0,1
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0,0
302,57,0,1,130,236,0,0,174,0,0.0,1,1,2,0,1


# 5. Save Model

In [16]:
save_model(best_model, model_name='ridge-model')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=['sex', 'cp', 'fbs',
                                                             'restecg', 'exang',
                                                             'thal'],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='target',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=Non...
                 ('fix_perfect', Remove_100(target='target')),
                 ('clean_names', Clean_Colum_Names()),
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
               

In [18]:
model = load_model('ridge-model')

Transformation Pipeline and Model Successfully Loaded


In [19]:
model.predict(df.tail())

array([0, 1, 0, 0, 1])