In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import seaborn as sb

# Define the column names
cols = ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5", "CLASS"]

df = pd.read_csv('./data/poker-hand-testing.data', names=cols)
df.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,CLASS
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0


In [2]:
def roc_plot(y_test, y_prob):
    from sklearn.metrics import roc_curve, auc
    from sklearn.preprocessing import label_binarize
    from itertools import cycle
    
    y_test_bin = label_binarize(y_test, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    n_classes = y_test_bin.shape[1]

    fpr, tpr, roc_auc = dict(), dict(), dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_prob[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green',
                'red', 'purple', 'yellow', 'gray', 'pink', 'brown'])
    plt.figure(figsize=(10, 10))
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color,
                label=f'ROC Curve of class {i} (area = {roc_auc[i]:.2f})')

    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([-.05, 1.01])
    plt.ylim([0.0, 1.01])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC for poker hands')
    plt.legend(loc='lower right')
    plt.show(block=True)

In [3]:
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split


x = normalize(df.iloc[:, :10].values)
y = df['CLASS'].values

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=.2, stratify=y)

In [None]:
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import classification_report

n_splits = 3
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)


# params = {
#     'iterations': [250, 500, 1000],
#     'depth': [4, 6, 8, 10],
#     'learning_rate': [0.001, 0.01, 0.05, 0.1],
#     'l2_leaf_reg': [1, 3, 5, 7, 9]
# }

for train_index, val_index in skf.split(x, y):
    x_train, x_test = x[train_index], x[val_index]
    y_train, y_test = y[train_index], y[val_index]

    cat_model = CatBoostClassifier(iterations=500, learning_rate=.05, depth=10,
                                   l2_leaf_reg=10, loss_function='MultiClass', verbose=200, task_type='GPU')
    cat_model.fit(x_train, y_train)

    y_pred = cat_model.predict(x_test)
    y_prob = cat_model.predict_proba(x_test)

    print(classification_report(y_test, y_pred, digits=5, zero_division=1))

In [None]:
roc_plot(y_test, y_prob)