### This notebook demonstrates a basic classification workflow using PyCaret.
- It includes:
    - Data loading 
    - Preprocessing 
    - model training and evaluation.

### Installing and Importing necessary libraries

In [1]:
# %pip install pandas
# %pip install pycaret 
# %pip install scikit_learn

In [2]:
import pandas as pd
from sklearn.datasets import load_iris

from pycaret.classification import *
from pycaret.classification import setup, compare_models
from pycaret.classification import ClassificationExperiment

### Loading the dataset

In [None]:
# Load the iris dataset
iris = load_iris()

df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


### Initializing the PyCaret environment

In [8]:
init = setup(df, target = 'target', session_id = 123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(150, 5)"
4,Transformed data shape,"(150, 5)"
5,Transformed train set shape,"(105, 5)"
6,Transformed test set shape,"(45, 5)"
7,Numeric features,4
8,Preprocess,True
9,Imputation type,simple


####  This cell compares multiple machine learning models using PyCaret's compare_models function


In [9]:
best = init.compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.591
knn,K Neighbors Classifier,0.9718,0.983,0.9718,0.978,0.9712,0.9573,0.9609,0.313
qda,Quadratic Discriminant Analysis,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.009
lda,Linear Discriminant Analysis,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.01
lightgbm,Light Gradient Boosting Machine,0.9536,0.9935,0.9536,0.9634,0.9528,0.9298,0.9356,0.072
nb,Naive Bayes,0.9445,0.9868,0.9445,0.9525,0.9438,0.9161,0.9207,0.009
et,Extra Trees Classifier,0.9445,0.9935,0.9445,0.9586,0.9426,0.9161,0.9246,0.038
gbc,Gradient Boosting Classifier,0.9355,0.0,0.9355,0.9416,0.9325,0.9023,0.9083,0.053
dt,Decision Tree Classifier,0.9264,0.9429,0.9264,0.9502,0.9201,0.8886,0.904,0.008
rf,Random Forest Classifier,0.9264,0.9909,0.9264,0.9343,0.9232,0.8886,0.8956,0.047


#### This cell evaluates the best model's performance using PyCaret's evaluate_model function


In [None]:
init.evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…