In [3]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
import xgboost as xgb

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine


In [4]:
X, y = load_wine(return_X_y = True)

In [5]:
features = [
    'alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium',
    'total_phenols', 'flavanoids', 'nonflavanoid_phenols',
    'proanthocyanins', 'color_intensity', 'hue',
    'od280/od315_of_diluted_wines', 'proline'
]

X = pd.DataFrame(data=X, columns=features)
y = pd.DataFrame(data=y, columns=['classes'])
X.head()
y.head()

Unnamed: 0,classes
0,0
1,0
2,0
3,0
4,0


In [6]:
y.classes.value_counts()

1    71
0    59
2    48
Name: classes, dtype: int64

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

X_train.shape, y_train.shape, X_test.shape, y_test.shape,

((133, 13), (133, 1), (45, 13), (45, 1))

In [8]:
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test)

In [9]:
params = {
    'max_depth': 6,
    'objective': 'multi:softmax',  # error evaluation for multiclass training
    'num_class': 3,
    # Set number of GPUs if available   
    'n_gpus': 0
}

In [10]:
bst = xgb.train(params, dtrain)


In [11]:
pred = bst.predict(dtest)


In [12]:
print(classification_report(y_test, pred))


              precision    recall  f1-score   support

           0       0.94      1.00      0.97        15
           1       1.00      0.94      0.97        18
           2       1.00      1.00      1.00        12

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [14]:
pred[:10]

array([0., 1., 0., 0., 1., 0., 0., 1., 1., 2.], dtype=float32)