# Hand Clustering

Experimenting with bucketing hands together using different metrics.

Idea: Each time a public chance node is reached, a new board is created.  We group all hand combinations from both players ranges into these buckets

### Imports

In [90]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from time import time
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.preprocessing import scale

from poker.hand import Range
from treys import Card, Deck, Evaluator

Helper Function to build integer based ranges from poker.hand ranges

In [2]:
def build_range(combos):
    # convert pretty suit to char
    def suit_to_char(s):
        if s == chr(9824): return 's'
        if s == chr(9829): return 'h'
        if s == chr(9830): return 'd'
        if s == chr(9827): return 'c'
    r = []
    for combo in combos:
        c1_str = str(combo.first)
        c1 = c1_str[0] + suit_to_char(c1_str[1])
        c2_str = str(combo.second)
        c2 = c2_str[0] + suit_to_char(c2_str[1])
        r.append((Card.new(c1), Card.new(c2)))
    return r

def cards_to_str(combo):
    s = ""
    for c in combo:
        s += Card.int_to_str(c) + " "
    return s

### Create Data for testing
Make hand ranges + board

In [47]:
# hand_range_1 = build_range(Range('22+ A2+ K2+ Q2+ J2+ T2+ 92+ 82+ 72+ 62+ 52+ 42+ 32+').combos)
hand_range_1 = build_range(Range('33+ ATo+ A8s+ KTs+ KJ+ QJ+ JTs T9s').combos)
hand_range_2 = build_range(Range('33+ ATo+ A8s+ KTs+ KJ+ QJ+ JTs T9s').combos)
board = [Card.new('Td'), Card.new('4h'), Card.new('5h'), Card.new('6s'), Card.new('7h')]

### Feature Engineering

First, we have to...
 - combine both ranges
 - remove duplicates
 - remove combos that conflict with board

In [48]:
combined_range = list(set(hand_range_1 + hand_range_2))
combined_range = list(filter(lambda c: not c[0] in board and not c[1] in board, combined_range))

Next, my first idea is to capture different features of each hand such as
 - Hand score
 - Hand score squared
 - Hand score per card

In [70]:
evaluator = Evaluator()

# create object to store data
data = pd.DataFrame(data=combined_range, columns=['card1', 'card2'])
# create map to store card scores
card_scores = dict()
# create map to store combo scores
scores = []
for (i, combo) in enumerate(combined_range):
    score = evaluator.evaluate(list(combo), board)
    scores.append(score)
    try: card_scores[combo[0]] += score
    except: card_scores[combo[0]] = score
    try: card_scores[combo[1]] += score
    except: card_scores[combo[1]] = score
        
# normalize value, could do this a better way
# max_score = np.max(scores)
# scores /= max_score
# add to object
data['ev'] = scores
data['ev_2'] = data['ev'] * data['ev']
# add card scores
data['card1_ev'] = data.apply(lambda row: card_scores[row['card1']], axis=1)
data['card2_ev'] = data.apply(lambda row: card_scores[row['card2']], axis=1)
# normalize card scores
# data['card1_ev'] = 1 - (data['card1_ev'] / np.max(data['card1_ev']))
# data['card2_ev'] = 1 - (data['card2_ev'] / np.max(data['card2_ev']))

data.head()

Unnamed: 0,card1,card2,ev,ev_2,card1_ev,card2_ev
0,1065995,1082379,2178,4743684,4356,4356
1,1057803,1082379,2178,4743684,4356,4356
2,1057803,1065995,2178,4743684,4356,4356
3,8406803,8423187,4596,21123216,20348,24724
4,8398611,8423187,4596,21123216,16051,24724


### Testing
Now lets try and use KMeans to group the data

In [100]:
X_train = data[['ev', 'ev_2', 'card1_ev', 'card2_ev']]
n_clusters = 87
estimator = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
estimator.fit(X_train)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=87, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

Create a dict of all hands in their cluster to see

In [101]:
clusters = [None] * n_clusters
for (i, c) in enumerate(combined_range):
    bucket = estimator.predict(X_train[i:i+1])[0]
    if clusters[bucket] == None:
        clusters[bucket] = [cards_to_str(c)]
    else:
        clusters[bucket].append(cards_to_str(c))

### Results

In [120]:
print(cards_to_str(board))
print(clusters[10])
print("Unused:", n_clusters - sum(x is not None for x in clusters))

Td 4h 5h 6s 7h 
['Ad Qd ', 'Ac Qc ', 'As Qs ', 'Ac Qs ', 'Ac Qd ', 'Ad Qc ', 'Ad Qs ', 'As Qc ', 'As Qd ']
Unused: 0


This seems to work pretty well.  It's important to note that this will only work on the river.