In [1]:
pip install ucimlrepo

Collecting ucimlrepo
  Obtaining dependency information for ucimlrepo from https://files.pythonhosted.org/packages/3b/07/1252560194df2b4fad1cb3c46081b948331c63eb1bb0b97620d508d12a53/ucimlrepo-0.0.7-py3-none-any.whl.metadata
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install pyspark




In [3]:
from ucimlrepo import fetch_ucirepo
import numpy as np

# fetch dataset
poker_hand = fetch_ucirepo(id=158)

# data (as pandas dataframes)
X = poker_hand.data.features
y = poker_hand.data.targets
y_shaped = np.ravel(y)

In [4]:
# metadata
print(poker_hand.metadata)


{'uci_id': 158, 'name': 'Poker Hand', 'repository_url': 'https://archive.ics.uci.edu/dataset/158/poker+hand', 'data_url': 'https://archive.ics.uci.edu/static/public/158/data.csv', 'abstract': 'Purpose is to predict poker hands', 'area': 'Games', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1025010, 'num_features': 10, 'feature_types': ['Categorical', 'Integer'], 'demographics': [], 'target_col': ['CLASS'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2002, 'last_updated': 'Sat Mar 09 2024', 'dataset_doi': '10.24432/C5KW38', 'creators': ['Robert Cattral', 'Franz Oppacher'], 'intro_paper': None, 'additional_info': {'summary': 'Each record is an example of a hand consisting of five playing cards drawn from a standard deck of 52. Each card is described using two attributes (suit and rank), for a total of 10 predictive attributes. There is one Class attribute that describes the "Poker Hand". T

In [5]:
# variable information
print(poker_hand.variables)

     name     role     type demographic description units missing_values
0      S1  Feature  Integer        None        None  None             no
1      C1  Feature  Integer        None        None  None             no
2      S2  Feature  Integer        None        None  None             no
3      C2  Feature  Integer        None        None  None             no
4      S3  Feature  Integer        None        None  None             no
5      C3  Feature  Integer        None        None  None             no
6      S4  Feature  Integer        None        None  None             no
7      C4  Feature  Integer        None        None  None             no
8      S5  Feature  Integer        None        None  None             no
9      C5  Feature  Integer        None        None  None             no
10  CLASS   Target  Integer        None        None  None             no


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_shaped, test_size=0.2, random_state=42)

# Standardize the data (SVMs perform better with standardized data)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
linear_svm_model = LinearSVC(multi_class='ovr')
linear_svm_model.fit(X_train, y_train)



In [9]:
# Make predictions
predictions = linear_svm_model.predict(X_test)
print(predictions)

[0 0 0 ... 0 0 0]


In [10]:
# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 49.96%


In [11]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5"]))

              precision    recall  f1-score   support

          S1       0.50      1.00      0.67    102428
          C1       0.00      0.00      0.00     86945
          S2       0.00      0.00      0.00      9691
          C2       0.00      0.00      0.00      4352
          S3       0.00      0.00      0.00       808
          C3       0.00      0.00      0.00       405
          S4       0.00      0.00      0.00       308
          C4       0.00      0.00      0.00        60
          S5       0.00      0.00      0.00         3
          C5       0.00      0.00      0.00         2

    accuracy                           0.50    205002
   macro avg       0.05      0.10      0.07    205002
weighted avg       0.25      0.50      0.33    205002



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pandas as pd
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual No Hand", "Actual Pair", "Actual Two Pair", "Actual Three of a Kind", "Actual Straight", "Actual Flush", "Actual Full House", 
               "Actual Four of a Kind", "Actual Straight Flush", "Actual Royal Flush"], columns=["Predicted No Hand", "Predicted Pair", "Predicted Two Pair", 
                                                                                          "Predicted Three of a Kind", "Predicted Straight", "Predicted Flush", 
                                                                                          "Predicted Full House", "Predicted Four of a Kind", 
                                                                                          "Predicted Straight Flush", "Predicted Royal Flush" ]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [15]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted No Hand,Predicted Pair,Predicted Two Pair,Predicted Three of a Kind,Predicted Straight,Predicted Flush,Predicted Full House,Predicted Four of a Kind,Predicted Straight Flush,Predicted Royal Flush
Actual No Hand,102428,0,0,0,0,0,0,0,0,0
Actual Pair,86945,0,0,0,0,0,0,0,0,0
Actual Two Pair,9691,0,0,0,0,0,0,0,0,0
Actual Three of a Kind,4352,0,0,0,0,0,0,0,0,0
Actual Straight,808,0,0,0,0,0,0,0,0,0
Actual Flush,405,0,0,0,0,0,0,0,0,0
Actual Full House,308,0,0,0,0,0,0,0,0,0
Actual Four of a Kind,60,0,0,0,0,0,0,0,0,0
Actual Straight Flush,3,0,0,0,0,0,0,0,0,0
Actual Royal Flush,2,0,0,0,0,0,0,0,0,0


Accuracy Score : 0.49964390591311303
Classification Report
              precision    recall  f1-score   support

           0       0.50      1.00      0.67    102428
           1       0.00      0.00      0.00     86945
           2       0.00      0.00      0.00      9691
           3       0.00      0.00      0.00      4352
           4       0.00      0.00      0.00       808
           5       0.00      0.00      0.00       405
           6       0.00      0.00      0.00       308
           7       0.00      0.00      0.00        60
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         2

    accuracy                           0.50    205002
   macro avg       0.05      0.10      0.07    205002
weighted avg       0.25      0.50      0.33    205002



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
