In [36]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import  train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso, Ridge
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score

In [4]:
df = pd.read_csv("poker_hand_train_true.data")
df.columns = ["suit_1", "rank_1", "suit_2", "rank_2",
              "suit_3", "rank_3", "suit_4", "rank_4",
              "suit_5", "rank_5", "hand"]

In [None]:
df.head()

In [6]:
df["hand"] = df["hand"].map({
    0: "nothing", 1: "One pairs", 2: "Two pairs", 3: "Three of a kind",
    4: "Straight", 5: "Flush", 6: "Full house", 7: "Four of a kind",
    8: "Straight flush", 9: "Royal flush"
})

In [None]:
df.head()

In [None]:
plt.figure(figsize=(16, 12))
sns.countplot(x="hand", data=df)
plt.show()

In [10]:
X = df.drop("hand", axis=1)
y = df["hand"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

In [23]:
rf_model = RandomForestClassifier(n_estimators=1000)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.6339464214314274

In [27]:
dt_model = DecisionTreeClassifier(max_depth=5, min_samples_leaf=100)
dt_model.fit(X_train, y_train)
y_pred = dt_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.5483806477409037

In [28]:
df["hand"] = df["hand"].map({
    "nothing": 0, "One pairs": 1, "Two pairs": 2, "Three of a kind": 3,
    "Straight": 4, "Flush": 5, "Full house": 6, "Four of a kind": 7,
    "Straight flush": 8, "Royal flush": 9
})

In [29]:
X = df.drop("hand", axis=1)
y = df["hand"]

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

In [33]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy_score(y_test, y_pred)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.5031987205117953

In [37]:
linreg = LinearRegression()
linreg.fit(X_train, y_train)
y_pred = linreg.predict(X_test)
print("R2: ", r2_score(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))

R2:  8.448313194242552e-05
MAE:  0.6226342740184215


In [38]:
lasso = Lasso()
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
print("R2: ", r2_score(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))

R2:  -1.0796571683568956e-05
MAE:  0.6227274372402286


In [39]:
ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
print("R2: ", r2_score(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))

R2:  8.44907970375397e-05
MAE:  0.6226342720303358


In [40]:
pickle.dump(rf_model, open("rf.pkl", "wb"))