In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import seaborn as sb

# Define the column names
cols = ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5", "CLASS"]

df = pd.read_csv('./data/poker-hand-testing.data', names=cols)
df.head()

In [None]:
def roc_plot(y_test, y_prob):
    from sklearn.metrics import roc_curve, auc
    from sklearn.preprocessing import label_binarize
    from itertools import cycle
    
    y_test_bin = label_binarize(y_test, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    n_classes = y_test_bin.shape[1]

    fpr, tpr, roc_auc = dict(), dict(), dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_prob[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green',
                'red', 'purple', 'yellow', 'gray', 'pink', 'brown'])
    plt.figure(figsize=(10, 10))
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color,
                label=f'ROC Curve of class {i} (area = {roc_auc[i]:.2f})')

    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([-.05, 1.01])
    plt.ylim([0.0, 1.01])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC for poker hands')
    plt.legend(loc='lower right')
    plt.show(block=True)

In [None]:
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split


x = normalize(df.iloc[:, :10].values)
y = df['CLASS'].values

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=.2, stratify=y)

Gradient Boost Classifier


In [None]:
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import classification_report

n_splits = 3
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)


# params = {
#     'iterations': [250, 500, 1000],
#     'depth': [4, 6, 8, 10],
#     'learning_rate': [0.001, 0.01, 0.05, 0.1],
#     'l2_leaf_reg': [1, 3, 5, 7, 9]
# }

for train_index, val_index in skf.split(x, y):
    x_train, x_test = x[train_index], x[val_index]
    y_train, y_test = y[train_index], y[val_index]

    cat_model = CatBoostClassifier(iterations=500, learning_rate=.05, depth=10,
                                   l2_leaf_reg=10, loss_function='MultiClass', verbose=200, task_type='GPU')
    cat_model.fit(x_train, y_train)

    y_pred = cat_model.predict(x_test)
    y_prob = cat_model.predict_proba(x_test)

    print(classification_report(y_test, y_pred, digits=5, zero_division=1))

In [None]:
roc_plot(y_test, y_prob)

Random Forest Classifier


In [None]:
from sklearn.ensemble import RandomForestClassifier

# 'n_estimators': 200,
#  'min_samples_split': 2,
#  'min_samples_leaf': 1,
#  'max_depth': 40,
#  'criterion': 'entropy',
#  'bootstrap': False}

rf_model = RandomForestClassifier(criterion='entropy', min_samples_leaf=1, n_jobs=8,
                            max_depth=40, bootstrap=False, n_estimators=200, min_samples_split=2, verbose=1, warm_start=True)

rf_model.fit(x_train, y_train.ravel())
y_pred = rf_model.predict(x_test)
y_prob = cat_model.predict_proba(x_test)

print(classification_report(y_test, y_pred, digits=5, zero_division=1))

In [None]:
roc_plot(y_test, y_prob)

Neural Network


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

cols = ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5", "CLASS"]

df = pd.read_csv('./data/poker-hand-testing.data', names=cols)

In [None]:
x_train = df.iloc[:, 0:10].values
y_train = df.iloc[:, 10].values

x_train = np.array(x_train)
y_train = np.array(y_train)
y_train = tf.keras.utils.to_categorical(y_train)

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(16, input_shape=(10,),
          kernel_initializer='uniform', activation='relu'))
model.add(tf.keras.layers.Dense(
    8, kernel_initializer='uniform', activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'],)

h = model.fit(x_train, y_train, epochs=10, batch_size=32)

model.save('model.keras')
print(model.summary())

In [None]:
from sklearn.calibration import label_binarize
from sklearn.metrics import roc_curve, auc
from itertools import cycle


model = tf.keras.models.load_model('model.keras')

# Load test data
test_data = pd.read_csv('./data/poker-hand-training-true.data')
x_test = test_data.iloc[:, 0:10].values
y_test = test_data.iloc[:, 10].values

# Predict using the model
y_pred_probs = model.predict(x_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)

# Convert labels to one-hot encoding

hand_name = {
    0: 'Nothing in hand',
    1: 'One pair',
    2: 'Two pairs',
    3: 'Three of a kind',
    4: 'Straight',
    5: 'Flush',
    6: 'Full house',
    7: 'Four of a kind',
    8: 'Straight flush',
    9: 'Royal flush',
}

y_test_bin = label_binarize(y_test, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
n_classes = y_test_bin.shape[1]

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_probs[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curves
plt.figure(figsize=(10, 8))
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green',
               'red', 'purple', 'yellow', 'gray', 'pink', 'brown'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label='{0} (area = {1:0.2f})'.format(hand_name[i], roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) for Multi-Class')
plt.legend(loc="lower right")
plt.show()

In [None]:
from sklearn.metrics import classification_report
features = ['S1', 'C1', 'S2', 'C2', 'S3',
            'C3', 'S4', 'C4', 'S5', 'C5', 'CLASS']
tdata = pd.read_csv('./data/poker-hand-testing.data', names=features)

x_test = tdata.iloc[:, 0:10].values
y_test = tdata.iloc[:, 10].values

y_test = tf.keras.utils.to_categorical(y_test)

loss, accuracy = model.evaluate(x_test, y_test)

# Convert continuous predictions to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Generate classification report
report = classification_report(y_test_classes, y_pred_classes)

report