## Dependencies

In [1]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [2]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
poker_hand = fetch_ucirepo(id=158)

# data (as pandas dataframes)
X = poker_hand.data.features
y = poker_hand.data.targets


# variable information
print(poker_hand.variables)


     name     role     type demographic description units missing_values
0      S1  Feature  Integer        None        None  None             no
1      C1  Feature  Integer        None        None  None             no
2      S2  Feature  Integer        None        None  None             no
3      C2  Feature  Integer        None        None  None             no
4      S3  Feature  Integer        None        None  None             no
5      C3  Feature  Integer        None        None  None             no
6      S4  Feature  Integer        None        None  None             no
7      C4  Feature  Integer        None        None  None             no
8      S5  Feature  Integer        None        None  None             no
9      C5  Feature  Integer        None        None  None             no
10  CLASS   Target  Integer        None        None  None             no


In [3]:
# metadata
print(poker_hand.metadata)


{'uci_id': 158, 'name': 'Poker Hand', 'repository_url': 'https://archive.ics.uci.edu/dataset/158/poker+hand', 'data_url': 'https://archive.ics.uci.edu/static/public/158/data.csv', 'abstract': 'Purpose is to predict poker hands', 'area': 'Games', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1025010, 'num_features': 10, 'feature_types': ['Categorical', 'Integer'], 'demographics': [], 'target_col': ['CLASS'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2002, 'last_updated': 'Sat Mar 09 2024', 'dataset_doi': '10.24432/C5KW38', 'creators': ['Robert Cattral', 'Franz Oppacher'], 'intro_paper': None, 'additional_info': {'summary': 'Each record is an example of a hand consisting of five playing cards drawn from a standard deck of 52. Each card is described using two attributes (suit and rank), for a total of 10 predictive attributes. There is one Class attribute that describes the "Poker Hand". T

In [4]:
# variable information
print(poker_hand.variables)

     name     role     type demographic description units missing_values
0      S1  Feature  Integer        None        None  None             no
1      C1  Feature  Integer        None        None  None             no
2      S2  Feature  Integer        None        None  None             no
3      C2  Feature  Integer        None        None  None             no
4      S3  Feature  Integer        None        None  None             no
5      C3  Feature  Integer        None        None  None             no
6      S4  Feature  Integer        None        None  None             no
7      C4  Feature  Integer        None        None  None             no
8      S5  Feature  Integer        None        None  None             no
9      C5  Feature  Integer        None        None  None             no
10  CLASS   Target  Integer        None        None  None             no


In [5]:
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [6]:
X.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5
0,1,10,1,11,1,13,1,12,1,1
1,2,11,2,13,2,10,2,12,2,1
2,3,12,3,11,3,13,3,10,3,1
3,4,10,4,11,4,1,4,13,4,12
4,4,1,4,13,4,12,4,11,4,10


In [7]:
y[:5]

Unnamed: 0,CLASS
0,9
1,9
2,9
3,9
4,9


In [8]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [9]:
# Creating StandardScaler instance
scaler = StandardScaler()

In [10]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

In [11]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

In [13]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)

In [14]:
# Making predictions using the testing data
predictions = model.predict(X_test_scaled)

In [15]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Nothing in hand", "One Pair", "Two Pairs", "Three of a kind", "Straight", "Flush", "Full house", "Four of a kind", "Straight flush", "Royal flush"], columns=["Nothing in Hand", "One Pair", "Two Pairs", "Three of a kind", "Straight", "Flush", "Full house", "Four of a kind", "Straight flush", "Royal flush"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)


In [16]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Nothing in Hand,One Pair,Two Pairs,Three of a kind,Straight,Flush,Full house,Four of a kind,Straight flush,Royal flush
Nothing in hand,90758,34575,1825,586,112,505,16,1,1,1
One Pair,32256,64784,7476,2774,605,113,124,7,4,0
Two Pairs,1451,6358,3884,409,65,5,115,18,1,0
Three of a kind,415,2447,345,2097,32,0,116,18,0,0
Straight,110,523,75,39,288,0,4,0,1,0
Flush,300,81,1,0,1,103,0,0,0,3
Full house,7,93,103,95,0,0,53,2,0,0
Four of a kind,1,4,18,26,0,0,3,13,0,0
Straight flush,0,2,0,0,3,1,0,0,0,0
Royal flush,0,0,0,0,0,1,0,0,0,0


Accuracy Score : 0.6321096728623665
Classification Report
              precision    recall  f1-score   support

           0       0.72      0.71      0.72    128380
           1       0.60      0.60      0.60    108143
           2       0.28      0.32      0.30     12306
           3       0.35      0.38      0.36      5470
           4       0.26      0.28      0.27      1040
           5       0.14      0.21      0.17       489
           6       0.12      0.15      0.14       353
           7       0.22      0.20      0.21        65
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         1

    accuracy                           0.63    256253
   macro avg       0.27      0.28      0.28    256253
weighted avg       0.64      0.63      0.63    256253

