In [12]:
# Import the modules
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [2]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
blackjack_df = pd.read_csv("Resources/blkjckhands_reduced.csv")

# Review the DataFrame
blackjack_df.head()

Unnamed: 0,PlayerName,PlayerNo,card1,card2,card3,card4,card5,sumofcards,dealcard1,dealcard2,...,dealcard4,dealcard5,sumofdeal,blkjck,winloss,plybustbeat,dlbustbeat,plwinamt,dlwinamt,ply2cardsum
0,0,Player1,7,10,0,0,0,17,10,8,...,0,0,18,nowin,Loss,Beat,Dlwin,0,10,17
1,1,Player2,10,9,0,0,0,19,10,8,...,0,0,18,nowin,Win,Plwin,Beat,20,0,19
2,2,Player3,9,8,0,0,0,17,10,8,...,0,0,18,nowin,Loss,Beat,Dlwin,0,10,17
3,3,Player4,2,10,0,5,0,17,10,8,...,0,0,18,nowin,Loss,Beat,Dlwin,0,10,12
4,4,Player5,10,2,0,5,0,17,10,8,...,0,0,18,nowin,Loss,Beat,Dlwin,0,10,12


In [5]:
# Separate the data into labels and features
label_encoder = LabelEncoder()
blackjack_df['winloss_encoded'] = label_encoder.fit_transform(blackjack_df['winloss'])

# Separate the y variable, the labels
y = blackjack_df['winloss_encoded']

# Separate the X variable, the features
X = blackjack_df[['card1','card2']]

In [6]:
# Review the y variable Series
print(y)

0         0
1         2
2         0
3         0
4         0
         ..
299995    0
299996    0
299997    0
299998    0
299999    1
Name: winloss_encoded, Length: 300000, dtype: int32


In [7]:
# Review the X variable DataFrame
X

Unnamed: 0,card1,card2
0,7,10
1,10,9
2,9,8
3,2,10
4,10,2
...,...,...
299995,5,8
299996,4,2
299997,2,8
299998,7,5


In [10]:
# Split the data using train_test_split
# Assign a random_state to the function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [13]:
# Instantiate the Logistic Regression model
# Assign a random_state parameter to the model
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

# Fit the model using training data
classifier.fit(X_train, y_train)

In [14]:
# Make a prediction using the testing data
predictions = classifier.predict(X_test)

In [15]:
#Display predictions in a Dataframe
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

Unnamed: 0,Prediction,Actual
18347,0,1
165043,2,0
222881,0,0
11634,2,1
20162,2,2
...,...,...
104452,0,0
191169,2,2
230375,0,0
105888,2,2


In [16]:
# Generate a confusion matrix for the model
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual Push", "Actual Loss", "Actual Win"], 
    columns=["Predicted Push", "Predicted Loss", "Predicted Win"]
)

print("Confusion Matrix")
display(cm_df)

Confusion Matrix


Unnamed: 0,Predicted Push,Predicted Loss,Predicted Win
Actual Push,25201,0,10456
Actual Loss,3058,0,3941
Actual Win,15990,0,16354


In [17]:
# Calculating the accuracy score
accuracy = accuracy_score(y_test, predictions)

In [18]:
# Print the classification report for the model
print(f"Accuracy Score : {accuracy}")
print("Classification Report")
print(classification_report(y_test, predictions))

Accuracy Score : 0.5540666666666667
Classification Report
              precision    recall  f1-score   support

           0       0.57      0.71      0.63     35657
           1       0.00      0.00      0.00      6999
           2       0.53      0.51      0.52     32344

    accuracy                           0.55     75000
   macro avg       0.37      0.40      0.38     75000
weighted avg       0.50      0.55      0.52     75000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
