In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score
from tqdm import tqdm

In [2]:
train_data_path = '../pkhnd/99a27ff0-e213-4d84-a253-75f2dc1d4836_train/train.csv'

In [3]:
train_data = pd.read_csv(train_data_path)

In [4]:
train_data.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,label
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0


In [21]:
def hand_check(hand_data):
    '''
        0: Nothing in hand; not a recognized poker hand 
        1: One pair; one pair of equal ranks within five cards
        2: Two pairs; two pairs of equal ranks within five cards
        3: Three of a kind; three equal ranks within five cards
        4: Straight; five cards, sequentially ranked with no gaps
        5: Flush; five cards with the same suit
        6: Full house; pair + different rank three of a kind
        7: Four of a kind; four equal ranks within five cards
        8: Straight flush; straight + flush
        9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush
    '''
    val = 0
    
    suits = []
    ranks = []
    
    for i in range(0, 10, 2):
        suits.append(hand_data[i])
        ranks.append(hand_data[i+1])
        
    suits = np.array(suits)
    ranks = np.array(ranks)
    
    uniq_r, counts_r = np.unique(ranks, return_counts=True)
    uniq_s, counts_s = np.unique(suits, return_counts=True)
    
    #checking for one pair, two pairs, three of a kind and four of a kind
    cnt = 0
    for i in range(len(counts_r)):
        if counts_r[i] > 1:
            cnt += 1
            if counts_r[i] == 3:
                val = max(val, 3)
            if counts_r[i] == 4:
                val = max(val, 7)
    if cnt == 1:
        val = max(val, 1)
    if cnt == 2:
        val = max(val, 2)
        
    #checking for flush
    if len(uniq_s) == 1:
        val = max(val, 5)
    
    #checking for straight, straight flush and royal flush
    check = 1
    check2 = 0
    sorted_uniq_r = sorted(uniq_r)
    if len(uniq_r) == 5:
        for i in range(1, 5):
            if sorted_uniq_r[i] != sorted_uniq_r[i-1]+1:
                check = 0
        if sorted_uniq_r[0] == 1:
            if sorted_uniq_r[1] == 10 and sorted_uniq_r[2] == 11 and sorted_uniq_r[3] == 12 and sorted_uniq_r[4] == 13:
                check2 = 1
            
        if check == 1 or check2 == 1:
            val = max(val, 4)
            if len(uniq_s) == 1:
                val = max(val, 8)
                if check2 == 1:
                    val = max(val, 9)
                    
    #checking for full house
    cnt2 = 0
    cnt3 = 0
    for i in range(len(counts_r)):
        if counts_r[i] == 2:
            cnt2 = 1
        if counts_r[i] == 3:
            cnt3 = 1
    if cnt3 == cnt2 and cnt2 == 1:
        val = max(val, 6)
        
    return val

In [22]:
# row = np.array(train_data.iloc[100,:])
# print(row, row[0:10])
# hand_check(row[0:10])
train_data.shape

(1000000, 11)

In [26]:
def run_model(data_frame, mode='train'):
    rows = data_frame.shape[0]
    
    res = []
    
    for i in tqdm(range(rows)):
        res.append(hand_check(np.array(data_frame.iloc[i, :])[0:10]))
    
    if mode == 'train':
        acc = 0
        for i in range(rows):
            if res[i] == np.array(data_frame.iloc[i, :])[10]:
                acc += 1
            
        return res, acc/rows
    elif mode == 'test':
        return res

In [24]:
res, acc = run_model(train_data)

100%|██████████| 1000000/1000000 [05:45<00:00, 2897.90it/s]


In [25]:
acc

1.0

In [28]:
test_data_path = '../pkhnd/55afe0ff-5708-4005-9836-ecd85c695595_test.csv'
test_data = pd.read_csv(test_data_path)
submission = run_model(test_data, mode='test')

100%|██████████| 25010/25010 [00:05<00:00, 4237.64it/s]


In [29]:
submission = pd.DataFrame(submission)
submission.to_csv('submission.csv',header=['label'],index=False)