# Installing Dependencies

In [1]:
!pip install catboost
!pip install lightgbm
!pip install xgboost

Collecting catboost
  Downloading catboost-1.2.2-cp39-cp39-win_amd64.whl (101.0 MB)
     ------------------------------------- 101.0/101.0 MB 13.4 MB/s eta 0:00:00
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
     ---------------------------------------- 47.0/47.0 KB ? eta 0:00:00
Installing collected packages: graphviz, catboost
Successfully installed catboost-1.2.2 graphviz-0.20.1


You should consider upgrading via the 'C:\Users\abhin\anaconda3\python.exe -m pip install --upgrade pip' command.




You should consider upgrading via the 'C:\Users\abhin\anaconda3\python.exe -m pip install --upgrade pip' command.


Collecting xgboost

You should consider upgrading via the 'C:\Users\abhin\anaconda3\python.exe -m pip install --upgrade pip' command.



  Using cached xgboost-2.0.0-py3-none-win_amd64.whl (99.7 MB)
Installing collected packages: xgboost
Successfully installed xgboost-2.0.0


# Importing Dependencies

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time

from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="xgboost")

# Loading Dataset
(Unbalanced) Wine Dataset
You can download it from: https://archive.ics.uci.edu/dataset/109/wine

In [3]:
wine_df = pd.read_csv('wine.data', header=None)

In [4]:
wine_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [5]:
wine_df.shape

(178, 14)

In [6]:
wine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       178 non-null    int64  
 1   1       178 non-null    float64
 2   2       178 non-null    float64
 3   3       178 non-null    float64
 4   4       178 non-null    float64
 5   5       178 non-null    int64  
 6   6       178 non-null    float64
 7   7       178 non-null    float64
 8   8       178 non-null    float64
 9   9       178 non-null    float64
 10  10      178 non-null    float64
 11  11      178 non-null    float64
 12  12      178 non-null    float64
 13  13      178 non-null    int64  
dtypes: float64(11), int64(3)
memory usage: 19.6 KB


In [23]:
# Unbalanced dataset
# First feature is the target

wine_df[0].value_counts()

2    71
1    59
3    48
Name: 0, dtype: int64

In [9]:
wine_df.isna().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
dtype: int64

In [10]:
X = wine_df.iloc[:, 1:]
y = wine_df.iloc[:, 0]

In [11]:
X.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [12]:
X.shape

(178, 13)

In [13]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: 0, dtype: int64

In [14]:
y.shape

(178,)

In [15]:
# Since version 1.3.2 XGBoost needs target columns to start with 0 value

le = LabelEncoder()
y = le.fit_transform(y)

# Training and comparing

In [17]:
names = [
          'AdaBoost',
          'GradBoost',
          'CatBoost',
          'LightGBM',
          'XGBoost'
        ]

classifiers = [
                AdaBoostClassifier(),
                GradientBoostingClassifier(),
                CatBoostClassifier(silent=True),
                LGBMClassifier(verbosity=-1),
                XGBClassifier()
              ]

In [18]:
rkf = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)

In [19]:
wine_scores = []
wine_scores_mean = []
wine_scores_std = []
model_names = []
execution_times = []

for name, clf in zip(names, classifiers):
  start_time = time.time()
  results = cross_val_score(clf, X, y, cv=rkf)
  end_time = time.time()

  wine_scores.append(results)
  wine_scores_mean.append(results.mean()*100)
  wine_scores_std.append(results.std()*100)
  model_names.append(name)
  execution_time = end_time - start_time
  execution_times.append(execution_time)

  print(f'--------- {name} on Wine Dataset ---------')
  print('Accuracy: %.2f%% (%.2f%%)' % (results.mean()*100, results.std()*100))
  print(f'Execution Time: {execution_time:.2f} seconds')
  print('------------------------------')

--------- AdaBoost on Wine Dataset ---------
Accuracy: 88.77% (9.19%)
Execution Time: 5.96 seconds
------------------------------
--------- GradBoost on Wine Dataset ---------
Accuracy: 95.07% (4.75%)
Execution Time: 21.71 seconds
------------------------------
--------- CatBoost on Wine Dataset ---------
Accuracy: 97.98% (3.23%)
Execution Time: 189.78 seconds
------------------------------
--------- LightGBM on Wine Dataset ---------
Accuracy: 97.42% (3.21%)
Execution Time: 4.19 seconds
------------------------------
--------- XGBoost on Wine Dataset ---------
Accuracy: 96.68% (4.06%)
Execution Time: 4.72 seconds
------------------------------


In [20]:
Algo_results = pd.DataFrame()
Algo_results['Names'] = names

In [21]:
Algo_results['Wine'] = wine_scores_mean

In [22]:
Algo_results

Unnamed: 0,Names,Wine
0,AdaBoost,88.767974
1,GradBoost,95.071895
2,CatBoost,97.977124
3,LightGBM,97.421569
4,XGBoost,96.683007


In [31]:
Algo_results_std = pd.DataFrame()
Algo_results_std['Names'] = names

In [32]:
Algo_results_std['Wine'] = wine_scores_std

In [33]:
Algo_results_std

Unnamed: 0,Names,Wine
0,AdaBoost,8.935178
1,GradBoost,4.689441
2,CatBoost,3.231395
3,LightGBM,3.206245
4,XGBoost,4.235288


In [34]:
Algo_time_results = pd.DataFrame()
Algo_time_results['Names'] = names

In [35]:
Algo_time_results['Wine'] = pd.Series(execution_times)

In [36]:
Algo_time_results

Unnamed: 0,Names,Wine
0,AdaBoost,12.676296
1,GradBoost,35.455523
2,CatBoost,252.544651
3,LightGBM,3.015574
4,XGBoost,3.69169
