In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sqlalchemy import create_engine
import psycopg2
from config import engine_key
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from ucimlrepo import fetch_ucirepo

In [None]:
# This cell can be used to pull the feature and target data already seperated directly from UC Irvines website

# fetch dataset 
poker_hand = fetch_ucirepo(id=158) 
  
# data (as pandas dataframes) 
X = poker_hand.data.features 
y = poker_hand.data.targets

# Exporting Data to Postgres and importing back into Python

In [None]:
# Merge features and target data into one pandas dataframe
poker_data = pd.concat([X,y], axis=1)

In [None]:
# Export poker_data df as a csv 
poker_data.to_csv('Resources/poker_data.csv', index=False)

In [30]:
# Create the engine and connection to our SQL database
engine = create_engine(f'postgresql+psycopg2://{engine_key}')
conn = engine.raw_connection()

In [31]:
# Query All Records in the the Database
poker_df = pd.read_sql("SELECT * FROM poker_data", conn)

  poker_df = pd.read_sql("SELECT * FROM poker_data", conn)


In [32]:
# Close the connection after use
conn.close()

In [33]:
# Check to see if data has been pulled from SQL into pandas df 
poker_df.head()

Unnamed: 0,s1,c1,s2,c2,s3,c3,s4,c4,s5,c5,CLASS
0,1,10,1,11,1,13,1,12,1,1,9
1,2,11,2,13,2,10,2,12,2,1,9
2,3,12,3,11,3,13,3,10,3,1,9
3,4,10,4,11,4,1,4,13,4,12,9
4,4,1,4,13,4,12,4,11,4,10,9


In [34]:
# Separate the data into labels and features

# Separate the y variable, the labels
y = poker_df['CLASS']

# Separate the X variable, the features
X = poker_df.drop(columns=['CLASS'])

# Random Forest 

In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
# Define the model pipeline
# List of categorical columns to one-hot encode
suits_columns = ['S1', 'S2', 'S3', 'S4', 'S5']
ranks_columns = ['C1', 'C2', 'C3', 'C4', 'C5']


In [5]:
# Combine suit and rank columns into one list for one-hot encoding
categorical_columns = suits_columns + ranks_columns

In [6]:
# Define a ColumnTransformer to apply OneHotEncoding to the categorical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('categorical', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'  # Leave the target column as is
)

In [7]:
# Create the full pipeline with preprocessor and model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [8]:
# Train the model
model.fit(X_train, y_train.to_numpy().ravel())

In [9]:
# Make predictions
y_pred = model.predict(X_test)

In [10]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9183


In [None]:
# Classification report to see more info on accuarcy and efficiency 
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98    153717
           1       0.86      0.98      0.92    130369
           2       0.83      0.01      0.02     14545
           3       0.88      0.19      0.31      6524
           4       1.00      0.00      0.01      1203
           5       1.00      0.01      0.01       611
           6       1.00      0.01      0.02       446
           7       0.00      0.00      0.00        81
           8       0.00      0.00      0.00         4
           9       0.00      0.00      0.00         3

    accuracy                           0.92    307503
   macro avg       0.65      0.22      0.23    307503
weighted avg       0.92      0.92      0.89    307503



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Linear Regression

In [None]:
# Encode the categorical target variable
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
# Initialize the linear regression model
model = LinearRegression()

In [None]:
# Fit the model to the training data
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test data
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Display results
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# K-Nearest Neighbor

In [None]:
# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Apply Min-Max scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Convert y_train and y_test to NumPy arrays and fix their shape to be 1D arrays
y_train = y_train.values.ravel()  # Convert to NumPy array and flatten it
y_test = y_test.values.ravel()    # Convert to NumPy array and flatten it

# Initialize and train the KNN model with k=?
knn = KNeighborsClassifier(n_neighbors = 9)
knn.fit(X_train_scaled, y_train)

In [None]:
# Predict using the test set
y_pred = knn.predict(X_test_scaled)

In [None]:
# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy after scaling: {accuracy * 100:.2f}%")

In [None]:
# Print accuracy on the training data
print("Training accuracy:", knn.score(X_train_scaled, y_train))

# Print accuracy on the test data
print("Test accuracy:", knn.score(X_test_scaled, y_test))

In [None]:
# Predict on the test set
y_pred = knn.predict(X_test_scaled)

# Generate classification report
report = classification_report(y_test, y_pred, target_names=[
    "Nothing in hand", "One pair", "Two pairs", "Three of a kind",
    "Straight", "Flush", "Full house", "Four of a kind", "Straight flush", "Royal flush"
])

print(report)

# Neural Network


In [40]:
# Encode the categorical target variable
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

In [41]:
# One-hot encode the target variable for classification
y_onehot = to_categorical(y_encoded)

In [42]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [None]:
# Define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),  # Input layer
    Dense(32, activation='relu'),  # Hidden layer
    Dense(y_onehot.shape[1], activation='softmax')  # Output layer with softmax for multiclass classification
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {test_loss:.2f}')
print(f'Test Accuracy: {test_accuracy:.2f}')

In [9]:
# Predict classes for the test data
y_pred = np.argmax(model.predict(X_test), axis=1)
y_test_labels = np.argmax(y_test, axis=1)


  y = column_or_1d(y, warn=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 623us/step - accuracy: 0.5428 - loss: 0.9611 - val_accuracy: 0.6679 - val_loss: 0.7764
Epoch 2/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 614us/step - accuracy: 0.7439 - loss: 0.6460 - val_accuracy: 0.8892 - val_loss: 0.3201
Epoch 3/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 606us/step - accuracy: 0.9153 - loss: 0.2574 - val_accuracy: 0.9864 - val_loss: 0.0734
Epoch 4/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 607us/step - accuracy: 0.9881 - loss: 0.0637 - val_accuracy: 0.9936 - val_loss: 0.0339
Epoch 5/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 620us/step - accuracy: 0.9925 - loss: 0.0377 - val_accuracy: 0.9944 - val_loss: 0.0258
Epoch 6/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 623us/step - accuracy: 0.9939 - loss: 0.0285 - val_accuracy: 0.9968 - val

In [11]:
# Print a classification report
print(classification_report(y_test_labels, y_pred, target_names=[str(cls) for cls in encoder.classes_]))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00    102428
           1       1.00      1.00      1.00     86945
           2       1.00      1.00      1.00      9691
           3       1.00      1.00      1.00      4352
           4       1.00      0.88      0.94       808
           5       1.00      0.36      0.53       405
           6       0.99      0.99      0.99       308
           7       1.00      0.97      0.98        60
           8       1.00      0.67      0.80         3
           9       0.00      0.00      0.00         2

    accuracy                           1.00    205002
   macro avg       0.90      0.79      0.82    205002
weighted avg       1.00      1.00      1.00    205002



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# First Optimization Attempt on Neural Network
---

### Adding a third hidden layer with a ReLU activation


In [43]:
# Define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),  # Input layer
    Dense(32, activation='relu'),  # Hidden layer
    Dense(16, activation='relu'), # Hidden layer
    Dense(y_onehot.shape[1], activation='softmax')  # Output layer with softmax for multiclass classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [44]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [45]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 615us/step - accuracy: 0.5533 - loss: 0.9404 - val_accuracy: 0.7164 - val_loss: 0.6784
Epoch 2/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 618us/step - accuracy: 0.7623 - loss: 0.5769 - val_accuracy: 0.8974 - val_loss: 0.2915
Epoch 3/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 615us/step - accuracy: 0.9156 - loss: 0.2377 - val_accuracy: 0.9840 - val_loss: 0.0697
Epoch 4/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 613us/step - accuracy: 0.9591 - loss: 0.1291 - val_accuracy: 0.9924 - val_loss: 0.0397
Epoch 5/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 619us/step - accuracy: 0.9735 - loss: 0.0936 - val_accuracy: 0.9936 - val_loss: 0.0384
Epoch 6/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 615us/step - accuracy: 0.9819 - loss: 0.0762 - val_accuracy: 0.9130 - val

In [46]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {test_loss:.2f}')
print(f'Test Accuracy: {test_accuracy:.2f}')

Test Loss: 0.02
Test Accuracy: 1.00


In [47]:
# Predict classes for the test data
y_pred = np.argmax(model.predict(X_test), axis=1)
y_test_labels = np.argmax(y_test, axis=1)


[1m6407/6407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 259us/step


In [48]:
# Print a classification report
print(classification_report(y_test_labels, y_pred, target_names=[str(cls) for cls in encoder.classes_]))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00    102428
           1       1.00      1.00      1.00     86945
           2       0.99      0.99      0.99      9691
           3       0.98      1.00      0.99      4352
           4       0.98      0.89      0.93       808
           5       0.00      0.00      0.00       405
           6       0.96      0.95      0.95       308
           7       0.83      1.00      0.91        60
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         2

    accuracy                           1.00    205002
   macro avg       0.67      0.68      0.68    205002
weighted avg       0.99      1.00      1.00    205002



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Second Optimization Attempt on Neural Network
---

### Returning to only two layers and changing activation to Tanh 

In [50]:
# Define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='tanh'),  # Input layer
    Dense(32, activation='tanh'),  # Hidden layer
    Dense(y_onehot.shape[1], activation='softmax')  # Output layer with softmax for multiclass classification
])

In [51]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [52]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 580us/step - accuracy: 0.5498 - loss: 0.9500 - val_accuracy: 0.6581 - val_loss: 0.7803
Epoch 2/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 579us/step - accuracy: 0.6981 - loss: 0.7098 - val_accuracy: 0.8398 - val_loss: 0.4239
Epoch 3/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 574us/step - accuracy: 0.8982 - loss: 0.2884 - val_accuracy: 0.9770 - val_loss: 0.0914
Epoch 4/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 579us/step - accuracy: 0.9841 - loss: 0.0690 - val_accuracy: 0.9926 - val_loss: 0.0375
Epoch 5/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 579us/step - accuracy: 0.9921 - loss: 0.0365 - val_accuracy: 0.9946 - val_loss: 0.0298
Epoch 6/20
[1m20501/20501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 582us/step - accuracy: 0.9938 - loss: 0.0287 - val_accuracy: 0.9690 - val

In [53]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {test_loss:.2f}')
print(f'Test Accuracy: {test_accuracy:.2f}')

Test Loss: 0.01
Test Accuracy: 1.00


In [54]:
# Predict classes for the test data
y_pred = np.argmax(model.predict(X_test), axis=1)
y_test_labels = np.argmax(y_test, axis=1)


[1m6407/6407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 258us/step


In [55]:
# Print a classification report
print(classification_report(y_test_labels, y_pred, target_names=[str(cls) for cls in encoder.classes_]))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00    102428
           1       1.00      1.00      1.00     86945
           2       0.99      1.00      0.99      9691
           3       1.00      1.00      1.00      4352
           4       0.81      0.88      0.85       808
           5       0.99      0.26      0.41       405
           6       1.00      1.00      1.00       308
           7       1.00      0.98      0.99        60
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         2

    accuracy                           1.00    205002
   macro avg       0.78      0.71      0.72    205002
weighted avg       1.00      1.00      1.00    205002



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
