# Predicting Poker Hand

### STEP 1: IMPORTING LIBRARIES

In [None]:
import pandas as pd

import matplotlib.pyplot as plt

%matplotlib inline

### STEP 2: LOADING THE DATASET

In [None]:
poker_df = pd.read_csv('Data/poker_hand_test.data')
poker_df 

### STEP 3: ANALYSING AND PRE-PROCESSING THE DATA

### STEP 4: CHANGING NAMES OF COLUMN ACCORDING TO THE REFERENCES PROVIDED FROM WHERE THE DATASET IS OBTAINED

In [None]:
poker_df.columns = ['first_suit', 'first_rank', 'second_suit', 'second_rank', 'third_suit', 'third_rank',
'fourth_suit', 'fourth_rank', 'fifth_suit', 'fifth_rank', 'hand']

labels = ['zilch', 'one_pair', 'two_pair', 'three_of_a_kind', 'straight', 'flush', 'full_house',
'four_of_a_kind', 'straight_flush', 'royal_flush']

poker_df

In [None]:
### 4) SEPERATING THE DATASET INTO FEATURES (X) AND TARGET VALUES (y)
X = poker_df.iloc[:, 0:9]
y = poker_df.hand

### STEP 5: VISUALIZING WHETHER CLASS BALANCE IS PRESENT IN OUR DATASET OR NOT

In [None]:
from yellowbrick.classifier import ClassBalance, ROCAUC, ClassPredictionError

balance = ClassBalance(size=(1080,720), labels=labels)

balance.fit(y)

balance.show()

##### THUS THERE IS CLASS IMBALANCE PRESENT IN OUR DATASET, ANS WE MUST REMOVE IT

### STEP 6: UP-SAMPLING FROM MINORITY CLASSES

In [None]:
poker_df.loc[poker_df['hand']>=5, 'hand'] = 5

y = poker_df.hand

labels = ['zilch', 'one_pair', 'two_pair', 'three_of_a_kind', 'straight', 'flush_or_better']

balance = ClassBalance(size=(1080,720), labels=labels)

balance.fit(y)

balance.show()

### STEP 7: TRAINING THE RANDOM FORESTS CLASSIFIER

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)

#CLF = SVC()
CLF = MLPClassifier(solver = 'adam', alpha = 0.05, hidden_layer_sizes=(50, 100, 50), learning_rate = 'adaptive', activation = 'tanh')

CLF.fit(X_train, y_train)

### STEP 8: EVALUATING THE MODEL

In [None]:
#### A) CLASSIFICATION ACCURACY

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, recall_score, precision_score

y_pred_test = CLF.predict(X_test)
y_pred_train = CLF.predict(X_train)

print ("Accuracy of the Model on Train Data is : {}".format(accuracy_score(y_train, y_pred_train)))
print ("Accuracy of the Model on Test Data is : {}".format(accuracy_score(y_test, y_pred_test)))

In [None]:
### B) ROC CURVE AND AUC:
from yellowbrick.classifier import ROCAUC

rocauc = ROCAUC(CLF, size =(1080, 720), classes = labels)

rocauc.score(X_test, y_test)
rocauc.show()

### STEP 9: CLASSIFICATION REPORT HEATMAP

In [None]:
from yellowbrick.classifier import ClassificationReport

report = ClassificationReport(CLF, size = (720, 640), classes = labels, cmap = 'PuBu')
report.score(X_test, y_test)
report.show()

### STEP 10: CLASS PREDICTION ERROR

In [None]:
error = ClassPredictionError(CLF, size= [1080, 720], classes = labels)
error.score(X_test, y_test)
error.poof()