# Building Wine Classifier Model with PyCaret

In [1]:
import pandas as pd
import numpy as np

In [2]:
wine_df = pd.read_csv('../../data/winequality-red.csv')

In [3]:
wine_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


## Transform quality feature into binary (Good or Bad)

In [4]:
wine_df.quality = np.where(wine_df.quality >= 6,'Good', 'Bad')

In [5]:
wine_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Bad
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,Bad
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,Bad
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,Good
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Bad


## Compare Model with Default Setup

In [7]:
from pycaret.classification import *

In [8]:
exp_clf101 = setup(data = wine_df, target = 'quality', session_id=123)

Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,123
1,Target Type,Binary
2,Label Encoded,"Bad: 0, Good: 1"
3,Original Data,"(1599, 12)"
4,Missing Values,False
5,Numeric Features,11
6,Categorical Features,0
7,Ordinal Features,False
8,High Cardinality Features,False
9,High Cardinality Method,


In [9]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
0,Extra Trees Classifier,0.7954,0.8837,0.8112,0.8098,0.8089,0.5885,0.5912,0.1468
1,Light Gradient Boosting Machine,0.7936,0.8688,0.8096,0.8072,0.8072,0.5851,0.5872,0.0733
2,Extreme Gradient Boosting,0.79,0.8719,0.8112,0.8022,0.8054,0.5774,0.5797,0.1033
3,CatBoost Classifier,0.7829,0.8629,0.7896,0.8046,0.7952,0.5642,0.5671,1.6671
4,Gradient Boosting Classifier,0.7677,0.8482,0.7779,0.7901,0.7817,0.5334,0.5373,0.1671
5,Random Forest Classifier,0.7641,0.8427,0.756,0.7955,0.7739,0.5275,0.5303,0.1153
6,Logistic Regression,0.7435,0.8033,0.746,0.7695,0.7558,0.4859,0.4885,0.0348
7,Ridge Classifier,0.74,0.0,0.7344,0.7716,0.7506,0.4793,0.4824,0.0042
8,Linear Discriminant Analysis,0.74,0.8072,0.7377,0.7699,0.7515,0.479,0.4819,0.0073
9,Ada Boost Classifier,0.7373,0.8073,0.7677,0.7506,0.7577,0.4709,0.4732,0.0744


## Compare Models with Tuned Setup

In [10]:
exp_clf102 = setup(data = wine_df, target = 'quality', session_id=123,
                  normalize = True, 
                  transformation = True)

Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,123
1,Target Type,Binary
2,Label Encoded,"Bad: 0, Good: 1"
3,Original Data,"(1599, 12)"
4,Missing Values,False
5,Numeric Features,11
6,Categorical Features,0
7,Ordinal Features,False
8,High Cardinality Features,False
9,High Cardinality Method,


In [11]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
0,Extra Trees Classifier,0.8079,0.889,0.823,0.82,0.8206,0.6138,0.6155,0.1477
1,Extreme Gradient Boosting,0.79,0.8718,0.8112,0.8022,0.8054,0.5774,0.5797,0.0451
2,Light Gradient Boosting Machine,0.7837,0.867,0.7962,0.8017,0.7974,0.5654,0.5682,0.073
3,CatBoost Classifier,0.782,0.863,0.7879,0.8042,0.7942,0.5624,0.5654,1.662
4,Random Forest Classifier,0.7676,0.8451,0.7577,0.8011,0.7772,0.5347,0.538,0.1268
5,Gradient Boosting Classifier,0.7659,0.8491,0.7762,0.7881,0.78,0.5298,0.5334,0.1665
6,Quadratic Discriminant Analysis,0.7525,0.8081,0.7612,0.7724,0.766,0.5035,0.5047,0.0018
7,Logistic Regression,0.7418,0.8146,0.7661,0.756,0.7599,0.4805,0.4821,0.0089
8,Ridge Classifier,0.7391,0.0,0.7561,0.7575,0.7552,0.4758,0.4778,0.0028
9,Linear Discriminant Analysis,0.7391,0.8147,0.7561,0.7575,0.7552,0.4758,0.4778,0.0042


## Build Extra Tree Classifier

In [12]:
et_model = create_model('et')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8393,0.9224,0.85,0.85,0.85,0.6769,0.6769
1,0.8036,0.8816,0.8,0.8276,0.8136,0.6061,0.6065
2,0.6875,0.7675,0.7167,0.7049,0.7107,0.371,0.371
3,0.8214,0.8864,0.85,0.8226,0.8361,0.6401,0.6405
4,0.7589,0.8651,0.7333,0.8,0.7652,0.5185,0.5205
5,0.7857,0.8715,0.8,0.8,0.8,0.5692,0.5692
6,0.8571,0.9207,0.8667,0.8667,0.8667,0.7128,0.7128
7,0.8661,0.9447,0.8333,0.9091,0.8696,0.7325,0.7354
8,0.8393,0.9041,0.8644,0.8361,0.85,0.677,0.6775
9,0.8198,0.9258,0.9153,0.7826,0.8437,0.634,0.6449


## Evaluate Model

In [13]:
evaluate_model(et_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

## Predict Test Set 

In [14]:
predict_model(et_model)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Extra Trees Classifier,0.8146,0.8983,0.8288,0.8256,0.8272,0.6272,0.6272


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Label,Score
0,-0.005604,0.394596,0.136158,1.140747,-0.747088,0.742605,1.668267,0.302880,-0.576412,-0.792626,-1.086355,1,0,0.17
1,-0.769331,0.600717,-0.999232,-0.883571,-0.126655,-0.796673,-0.707976,0.565035,1.088545,-0.904344,-1.749849,0,0,0.31
2,0.249192,1.056111,-0.293609,0.344431,-0.126655,-0.479653,-0.003577,0.329166,0.019022,0.145317,-1.086355,0,0,0.22
3,-0.522435,0.063170,0.336652,-0.184197,-0.482036,2.201986,0.964234,-1.371097,0.212476,1.130120,1.826208,1,1,0.91
4,-0.443413,2.926164,-1.502162,1.623971,0.754724,-1.325466,-1.326805,-0.067088,1.995006,-0.792626,0.494160,0,0,0.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,0.678236,0.177116,0.287344,0.185945,1.145283,-0.332051,-0.003577,0.139514,-0.442392,-0.578801,0.658770,0,0,0.46
476,-0.443413,-0.968102,0.187128,0.612182,0.399926,1.089754,1.006003,-0.146889,0.529757,0.426514,0.735837,0,1,0.83
477,1.888491,-0.692787,1.199605,-0.397363,-1.267137,-1.142205,-0.707976,1.395468,-1.814347,-0.190145,-1.245039,1,1,0.67
478,-1.495902,0.650741,0.287344,-1.158059,0.440161,0.183230,0.830077,0.091949,0.965523,1.003909,-1.245039,0,0,0.37


## Save Model

In [15]:
save_model(et_model, model_name = 'extra_tree_model')

Transformation Pipeline and Model Succesfully Saved
