<h2>PyCaret</h2><br>
<a> https://pycaret.org/ </a>

<h3>Run Classification Use PyCaret</h3>

<hr>
<h3>Load Data</h3>

In [1]:
import pandas as pd
from pycaret.classification import *

df=pd.read_csv('./iris_data/iris.csv')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


<h3>Missing Values</h3>

In [2]:
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

<h3>Setting up Environment</h3>

In [3]:
"""
Parameter

silent : 
When set to True, confirmation of data types is not required. 
All preprocessing will be performed assuming automatically inferred data types. 
Not recommended for direct use except for established pipelines.

html :
If set to False, prevents runtime display of monitor. 
This must be set to False when using environment that doesnt support HTML.
"""
exp1 = setup(df, target = 'species',train_size =0.5,silent=True)

Unnamed: 0,Description,Value
0,session_id,7560
1,Target,species
2,Target Type,Multiclass
3,Label Encoded,"setosa: 0, versicolor: 1, virginica: 2"
4,Original Data,"(150, 5)"
5,Missing Values,False
6,Numeric Features,4
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


<h3>Compare Models</h3>

In [4]:
compare_models()

# Use pull() to get result of compare models. 
result=pull()
result.to_csv('./compare_models_result.csv',index=False)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9875,0.995,0.9889,0.9906,0.9871,0.981,0.983,0.161
lda,Linear Discriminant Analysis,0.9875,1.0,0.9833,0.9906,0.9863,0.9805,0.9826,0.058
lr,Logistic Regression,0.975,1.0,0.9722,0.9812,0.9735,0.9614,0.9656,0.413
nb,Naive Bayes,0.975,0.995,0.9722,0.9812,0.9735,0.9614,0.9656,0.005
dt,Decision Tree Classifier,0.975,0.98,0.9722,0.9812,0.9735,0.9614,0.9656,0.029
rf,Random Forest Classifier,0.975,1.0,0.9722,0.9812,0.9735,0.9614,0.9656,0.102
qda,Quadratic Discriminant Analysis,0.975,1.0,0.9722,0.9812,0.9735,0.9614,0.9656,0.026
ada,Ada Boost Classifier,0.975,0.9794,0.9722,0.9812,0.9735,0.9614,0.9656,0.022
gbc,Gradient Boosting Classifier,0.975,0.9856,0.9722,0.9812,0.9735,0.9614,0.9656,0.028
et,Extra Trees Classifier,0.975,1.0,0.9722,0.9812,0.9735,0.9614,0.9656,0.093


<h3>Create Model</h3>
<hr>
‘lr’ : Logistic Regression<br>
‘knn’ : K Nearest Neighbour<br>
‘nb’ : Naives Bayes<br>
‘dt’ : Decision Tree Classifier<br>
‘svm’ : SVM – Linear Kernel<br>
‘rbfsvm’ : SVM – Radial Kernel<br>
‘gpc’ : Gaussian Process Classifier<br>
‘mlp’ : Multi Level Perceptron<br>
‘ridge’ : Ridge Classifier<br>
‘rf’ : Random Forest Classifier<br>
‘qda’ : Quadratic Discriminant Analysis<br>
‘ada’ : Ada Boost Classifier<br>
‘gbc’ : Gradient Boosting Classifier<br>
‘lda’ : Linear Discriminant Analysis<br>
‘et’ : Extra Trees Classifier<br>
‘xgboost’ : Extreme Gradient Boosting<br>
‘lightgbm’ : Light Gradient Boosting<br>
‘catboost’ : CatBoost Classifier<br>

In [5]:
lda = create_model('lda')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.875,1.0,0.8333,0.9062,0.8631,0.8049,0.826
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0,1.0,1.0


<h3>Tune Model</h3>

In [6]:
tuned_lda = tune_model(lda)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.875,1.0,0.8333,0.9062,0.8631,0.8049,0.826
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0,1.0,1.0


<h3>Plot Model</h3>

In [10]:
for i in ['auc','confusion_matrix','feature']:
    plot_model(tuned_lda,plot=i,save=True,scale=1)

<h3>Predict Model</h3>

In [None]:
pred_tuned_lda = predict_model(tuned_lda,data = df)
pred_tuned_lda

<h3>Evaluate Model</h3>

In [None]:
evaluate_model(tuned_lda)

<h3>Save Model</h3>

In [None]:
save_model(tuned_lda, 'model')

<h3>Load Model</h3>

In [None]:
model = load_model('model')