In [None]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

In [41]:
def pad_to_length(tensor_list, max_len=7):
    padded = pad_sequence(tensor_list, batch_first=True, padding_value=0)
    if padded.size(1) < max_len:
        pad_size = max_len - padded.size(1)
        padded = F.pad(padded, (0, pad_size), value=0)
    else:
        padded = padded[:, :max_len]
    return padded

# Function to process and pad .npy dataset
def process_round_data(npy_file, max_len=7):
    data = np.load(npy_file)
    x = data[:, :-1]
    y = data[:, -1]
    x_tensor_list = [torch.tensor(seq, dtype=torch.long) for seq in x]
    x_padded = pad_to_length(x_tensor_list, max_len=max_len)
    y_tensor = torch.tensor(y, dtype=torch.float)
    return x_padded, y_tensor

# === Load and process each round ===
x_first, y_first = process_round_data('data_for_first_round_Q.npy', max_len=7)
x_second, y_second = process_round_data('data_for_second_round_Q.npy', max_len=7)
x_third, y_third = process_round_data('data_for_third_round_Q.npy', max_len=7)          

# Stack all tensors into shape (3, N, features)
x_stacked = torch.stack([x_first, x_second, x_third], dim=1)  # shape: (N, 3, features)
x_all = x_stacked.reshape(-1, x_first.shape[1])

y_stacked = torch.stack([y_first, y_second, y_third], dim=1)  # shape: (N, 3)
y_all = y_stacked.reshape(-1)

# === Final dataset and dataloader ===
dataset_all = TensorDataset(x_all, y_all)
dataloader_all = DataLoader(dataset_all, batch_size=32, shuffle=True)

# === Inspect sizes ===
print("First round input shape:", x_first.shape)
print("First round label shape:", y_first.shape)

print("Second round input shape:", x_second.shape)
print("Second round label shape:", y_second.shape)

print("Third round input shape:", x_third.shape)
print("Third round label shape:", y_third.shape)

print("Combined input shape:", x_all.shape)
print("Combined label shape:", y_all.shape)

First round input shape: torch.Size([464100, 7])
First round label shape: torch.Size([464100])
Second round input shape: torch.Size([464100, 7])
Second round label shape: torch.Size([464100])
Third round input shape: torch.Size([464100, 7])
Third round label shape: torch.Size([464100])
Combined input shape: torch.Size([1392300, 7])
Combined label shape: torch.Size([1392300])


In [None]:
class EmbeddingNetLinear(nn.Module):
    def __init__(self, num_cards=7, embed_dim=16):
        super(EmbeddingNetLinear, self).__init__()
        self.num_cards = num_cards
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(53, embed_dim, padding_idx=0)

        self.fc1 = nn.Linear(num_cards * embed_dim, 64)  # Increased capacity
        #self.fc2 = nn.Linear(128, 64)                     # New layer
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)                       # Output layer

    def forward(self, x):
        embedded = self.embedding(x)  # Shape: [batch_size, num_cards, embed_dim]
        flat = embedded.view(x.size(0), self.num_cards * self.embed_dim)
        out = F.relu(self.fc1(flat))
        #out = F.relu(self.fc2(out))
        out = F.relu(self.fc3(out))
        return self.fc4(out).squeeze(1)
    

class EmbeddingNetConv1D(nn.Module):
    def __init__(self, num_cards=7, embed_dim=16):
        super(EmbeddingNetConv1D, self).__init__()
        self.num_cards = num_cards
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(53, embed_dim, padding_idx=0)

        self.conv1 = nn.Conv1d(in_channels=embed_dim, out_channels=16, kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=4)

        self.pool = nn.AdaptiveMaxPool1d(1)  # Reduces to [batch, 64, 1]

        self.fc = nn.Linear(64, 1)  # Final output layer

    def forward(self, x):
        x = self.embedding(x)           # [batch, 7, embed_dim]
        x = x.transpose(1, 2)           # [batch, embed_dim, 7]
        x = F.relu(self.conv1(x))       # [batch, 32, 6]
        x = F.relu(self.conv2(x))       # [batch, 64, 4]
        x = F.relu(self.conv3(x))       # [batch, 64, 1]
        x = self.pool(x).squeeze(-1)    # [batch, 64]
        return self.fc(x).squeeze(1)

In [30]:
def train_model(model, dataloader, epochs=10, lr=0.001):

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss() 

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for x_batch, y_batch in dataloader:
            x_batch = x_batch
            y_batch = y_batch

            optimizer.zero_grad()
            preds = model(x_batch)

            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            predicted = (preds > 0.5).float()
            # Accuracy: prediction within ±tolerance of target
            within_tolerance = (torch.abs(preds - y_batch) <= 0.1).float()
            correct += within_tolerance.sum().item()
            total += y_batch.size(0)

        acc = correct / total
        print(f"Epoch {epoch}/{epochs} | Loss: {total_loss:.4f} | Accuracy: {acc:.4f}")

In [76]:
#convModel = EmbeddingNetConv1D(7, 12) 

#train_model(convModel, dataloader_all, 100, 0.001)
## simple linear 85%
## simple linear 92% with 16 embedding and 0.01
linearModel = EmbeddingNetLinear(7, 12)

train_model(linearModel, dataloader_all, 20, 0.001)

Epoch 1/20 | Loss: 1557679.2357 | Accuracy: 0.0009
Epoch 2/20 | Loss: 1386813.8458 | Accuracy: 0.0014
Epoch 3/20 | Loss: 1328058.8799 | Accuracy: 0.0017
Epoch 4/20 | Loss: 1327648.3076 | Accuracy: 0.0021
Epoch 5/20 | Loss: 1313687.8679 | Accuracy: 0.0028
Epoch 6/20 | Loss: 1305188.8278 | Accuracy: 0.0033
Epoch 7/20 | Loss: 1302681.3804 | Accuracy: 0.0035
Epoch 8/20 | Loss: 1288982.4100 | Accuracy: 0.0035
Epoch 9/20 | Loss: 1285096.5682 | Accuracy: 0.0035
Epoch 10/20 | Loss: 1294963.1494 | Accuracy: 0.0040
Epoch 11/20 | Loss: 1284259.2015 | Accuracy: 0.0040
Epoch 12/20 | Loss: 1294057.9730 | Accuracy: 0.0041
Epoch 13/20 | Loss: 1298087.1357 | Accuracy: 0.0040
Epoch 14/20 | Loss: 1299889.8975 | Accuracy: 0.0042
Epoch 15/20 | Loss: 1295301.5253 | Accuracy: 0.0042
Epoch 16/20 | Loss: 1292186.6730 | Accuracy: 0.0043
Epoch 17/20 | Loss: 1290827.1182 | Accuracy: 0.0044
Epoch 18/20 | Loss: 1282046.0559 | Accuracy: 0.0046
Epoch 19/20 | Loss: 1282259.7582 | Accuracy: 0.0045
Epoch 20/20 | Loss: 1

## Get thresholds

In [77]:
from torch.utils.data import DataLoader
import torch
import pandas as pd

# 1. Create DataLoader
dataloader = DataLoader(dataset_all, batch_size=264, shuffle=False)

# 2. Set model to eval mode
linearModel.eval()

# 3. Collect everything
all_preds = []
all_inputs = []
all_targets = []

with torch.no_grad():
    for x_batch, y_batch in dataloader:
        preds = linearModel(x_batch)

        all_inputs.append(x_batch)
        all_targets.append(y_batch)
        all_preds.append(preds)

# 4. Concatenate everything
all_inputs = torch.cat(all_inputs, dim=0)    # shape: (N, 7)
all_targets = torch.cat(all_targets, dim=0)  # shape: (N,)
all_preds = torch.cat(all_preds, dim=0)      # shape: (N,)

# 5. Convert to NumPy for use with pandas
inputs_np = all_inputs.cpu().numpy()
targets_np = all_targets.cpu().numpy()
preds_np = all_preds.cpu().numpy()

# 6. Create DataFrame
columns = [f"C{i+1}" for i in range(inputs_np.shape[1])]
df_with_pred = pd.DataFrame(inputs_np, columns=columns)
df_with_pred["y_true"] = targets_np
df_with_pred["y_pred"] = preds_np

In [78]:
print(df_with_pred.head(10))

   C1  C2  C3  C4  C5  C6  C7  y_true    y_pred
0   1   2   0   0   0   0   0     5.0 -2.250086
1   1   2  38  26   5   0   0     3.0 -2.688885
2   1   2  38  26   5  31  52     2.0 -1.908117
3   1   2   0   0   0   0   0    -6.0 -2.250086
4   1   2  38  26   5   0   0    -4.0 -2.688885
5   1   2  38  26   5  31  52    -3.0 -1.908117
6   1   2   0   0   0   0   0    -6.0 -2.250086
7   1   2  38  26   5   0   0    -4.0 -2.688885
8   1   2  38  26   5  31  52    -3.0 -1.908117
9   1   2   0   0   0   0   0    -6.0 -2.250086


### Quantile

In [73]:
thresholds_quantile = {}
df_with_pred["round"] = df_with_pred.index % 3  # 0, 1, 2 repeating
df_with_pred["hand_id"] = df_with_pred.index // 3
for round_num in [0, 1, 2]:
    profitable_preds = df_with_pred[(df_with_pred["round"] == round_num)]["y_pred"]
    thresholds_quantile[round_num] = profitable_preds.quantile(0.60)  # or try 0.5 for median

print(thresholds_quantile)

{0: np.float64(-0.13816523551940918), 1: np.float64(-0.23101568222045898), 2: np.float64(-0.2757043600082424)}


### Brute force

In [83]:
df_with_pred["round"] = df_with_pred.index % 3  # 0, 1, 2 repeating
df_with_pred["hand_id"] = df_with_pred.index // 3
# Set random seed for reproducibility
np.random.seed(42)

# Get unique hand_ids
unique_hands = df_with_pred["hand_id"].unique()

# Sample a % of them
sampled_hands = np.random.choice(unique_hands, size=int(0.02 * len(unique_hands)), replace=False)

# Filter the DataFrame to keep only those hands
df_sample = df_with_pred[df_with_pred["hand_id"].isin(sampled_hands)].copy()
def evaluate_thresholds(df, thresholds):
    total_return = 0
    total_bet = 0

    for hand_id, group in df.groupby("hand_id"):
        group = group.sort_values("round")  # preflop → river
        bet_made = False

        for _, row in group.iterrows():
            threshold = thresholds[row["round"]]
            if row["y_pred"] >= threshold:
                total_return += row["y_true"]

                # Bet size depending on the round
                if row["round"] == 0:
                    total_bet += 6
                elif row["round"] == 1:
                    total_bet += 4
                else:
                    total_bet += 3

                bet_made = True
                break

        if not bet_made:
            total_return += -2  # penalty for folding

    return total_return / total_bet if total_bet > 0 else float('-inf')

In [None]:
import numpy as np
from itertools import product
import time

# Threshold ranges for preflop, flop, river
threshold_space = np.linspace(-3, 0, 5)  # adjust based on model output range

# Count total combinations
total_combinations = len(threshold_space) ** 3
checked = 0

best_thresholds = None
best_return = float('-inf')

start_time = time.time()

for t_pre, t_flop, t_river in product(threshold_space, repeat=3):
    thresholds = {0: t_pre, 1: t_flop, 2: t_river}
    total_ret = evaluate_thresholds(df_sample, thresholds)

    checked += 1
    if checked % 5 == 0 or checked == total_combinations:
        elapsed = time.time() - start_time
        print(f"Checked {checked}/{total_combinations} ({(checked/total_combinations)*100:.2f}%) in {elapsed:.1f}s")

    if total_ret > best_return:
        best_return = total_ret
        best_thresholds = thresholds

total_time = time.time() - start_time

print("\nSearch complete.")
print("Best thresholds:", best_thresholds)
print("Max return:", best_return)
print(f"Total combinations checked: {checked}")
print(f"Time taken: {total_time:.2f} seconds")


Checked 5/125 (4.00%) in 17.1s
Checked 10/125 (8.00%) in 33.7s
Checked 15/125 (12.00%) in 51.8s
Checked 20/125 (16.00%) in 69.5s
Checked 25/125 (20.00%) in 85.6s
Checked 30/125 (24.00%) in 101.6s
Checked 35/125 (28.00%) in 116.9s
Checked 40/125 (32.00%) in 132.2s
Checked 45/125 (36.00%) in 149.0s
Checked 50/125 (40.00%) in 165.7s
Checked 55/125 (44.00%) in 182.9s
Checked 60/125 (48.00%) in 199.8s
Checked 65/125 (52.00%) in 217.0s
Checked 70/125 (56.00%) in 234.0s
Checked 75/125 (60.00%) in 250.0s
Checked 80/125 (64.00%) in 267.3s
Checked 85/125 (68.00%) in 284.5s
Checked 90/125 (72.00%) in 302.6s
Checked 95/125 (76.00%) in 321.3s
Checked 100/125 (80.00%) in 340.4s
Checked 105/125 (84.00%) in 359.8s
Checked 110/125 (88.00%) in 378.9s
Checked 115/125 (92.00%) in 399.3s
Checked 120/125 (96.00%) in 420.7s
Checked 125/125 (100.00%) in 441.6s

Search complete.
Best thresholds: {0: np.float64(-0.25), 1: np.float64(-0.25), 2: np.float64(-2.75)}
Max return: -0.03928958518080599
Total combinatio

### SCIPY

In [None]:
from scipy.optimize import minimize
import numpy as np

# Objective function: negative of your custom evaluate_thresholds
def objective(threshold_array):
    thresholds = {0: threshold_array[0], 1: threshold_array[1], 2: threshold_array[2]}
    print(f"Evaluating thresholds: {thresholds}")
    return -evaluate_thresholds(df_sample, thresholds)

# Initial guess (you can tweak this)
initial_guess = [-0.5, -0.5, -0.5]

# Optional: bounds for each threshold
bounds = [(-4, 0), (-4, 0), (-4, 0)]

result = minimize(
    objective,
    initial_guess,
    method='Powell',  # or 'Powell' or 'L-BFGS-B'
    bounds=bounds,
    options={'disp': True, 'maxiter': 30}
)

# Extract results
best_thresholds = {i: t for i, t in enumerate(result.x)}
max_return = -result.fun

print("\n✅ Optimization complete.")
print("Best thresholds:", best_thresholds)
print("Max return:", max_return)

Evaluating thresholds: {0: np.float64(-0.5), 1: np.float64(-0.5), 2: np.float64(-0.5)}
Evaluating thresholds: {0: np.float64(-0.49999999), 1: np.float64(-0.5), 2: np.float64(-0.5)}
Evaluating thresholds: {0: np.float64(-0.5), 1: np.float64(-0.49999999), 2: np.float64(-0.5)}
Evaluating thresholds: {0: np.float64(-0.5), 1: np.float64(-0.5), 2: np.float64(-0.49999999)}

✅ Optimization complete.
Best thresholds: {0: np.float64(-0.5), 1: np.float64(-0.5), 2: np.float64(-0.5)}
Max return: -0.06252493815337962


## Testing model

In [8]:
import random
# define card set
suits = ['Hearts', 'Diamonds', 'Clubs', 'Spades']
ranks = ['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A']
rank_values = {rank: i for i, rank in enumerate(ranks, start=2)}

deck = [{'rank': rank, 'suit': suit} for suit in suits for rank in ranks]

combinations = ["High Card", "One Pair", "Two Pair", "Three of a Kind", "Four of a Kind", 
                "Full House", "Straight", "Flush", "Straight Flush", "Royal Flush"]
combinations_values = {combination: i for i, combination in enumerate(combinations, start=1)}
# set ordered winning combinations
winning_hands = ["High Card", "One Pair", "Two Pair", "Three of a Kind", "Straight", "Flush", 
                "Full House", "Four of a Kind", "Straight Flush", "Royal Flush"]

winning_hand_ranks = {hand: i for i, hand in enumerate(winning_hands)}
#enumerate the deck
enumerated_deck = dict(enumerate(deck, start=1))
num_deck = list(range(1, 53))     

In [9]:
def get_model_input_based_on_round(cards, round):
    if round == 0:
        input_cards = cards[:2]
    elif round == 1:
        input_cards = cards[:5]
    else:
        input_cards = cards[:7]
    
    # Convert to tensor
    input_tensor = torch.tensor(input_cards, dtype=torch.long)

    # Pad with zeros on the right if needed
    if len(input_tensor) < 7:
        pad_size = 7 - len(input_tensor)
        input_tensor = F.pad(input_tensor, (0, pad_size), value=0)
    else:
        input_tensor = input_tensor[:7]  # Truncate just in case

    return input_tensor.unsqueeze(0)  # shape: (max_len,)

In [63]:
def count_intervals(data, interval_size, min_val=-5, max_val=5):
    bins = {}
    num_bins = int((max_val - min_val) / interval_size)

    for value in data:
        if value < min_val or value >= max_val:
            continue  # skip out-of-range values

        # shift the value range to start at 0
        bin_index = int((value - min_val) / interval_size)
        bin_start = round(min_val + bin_index * interval_size, 10)
        bin_end = round(bin_start + interval_size, 10)

        bin_label = f"[{bin_start}, {bin_end})"
        if bin_label not in bins:
            bins[bin_label] = 0
        bins[bin_label] += 1

    return bins

In [64]:
import ultimate
def test_model_with_games(model, thresholds=[0, 0, 0], num_games = 100):
    model.eval()
    budget = 0
    allBet = 0
    folds = 0
    flops = 0
    rivers = 0
    preflops = 0
    
    # list of predictions
    preds = []
    for i in range(num_games): 
        # generate game (9 cards, played, river, dealer)
        cards = random.sample(num_deck, 9)
        winnings = 0

        # check what round we are in
        round = 0
        round_when_bet = None
        # play  game until it ends
        while True:
            model_input = get_model_input_based_on_round(cards, round)
            
            # Forward pass to get prediction (probability of betting 1)
            with torch.no_grad():
                pred = model(model_input)  # shape: [1, 1]

            pred_prob = pred.item()  # get scalar probability
            preds.append(pred_prob)
            # if prediction > 0.5 => bet, else don't bet
            if pred_prob > thresholds[round] and round_when_bet is None:
                round_when_bet = round
                break

            round += 1

            if round == 3:
                break

        # calculate winnings
        player_hand = [enumerated_deck[card] for card in cards[0:7]]
        dealer_hand = [enumerated_deck[card] for card in cards[2:]]

        player_combination = ultimate.get_best_hand(player_hand)
        dealer_combination = ultimate.get_best_hand(dealer_hand)

        player_rank = winning_hand_ranks[player_combination]
        dealer_rank = winning_hand_ranks[dealer_combination]

        victor = 0 # 0 = dealer, 1 = player
        
        if player_rank > dealer_rank:
            victor = 1
        elif player_rank == dealer_rank:
            result = ultimate.decider(player_combination, player_hand, 
                                    dealer_combination, dealer_hand)
            if result == "player":
                victor = 1
            elif result == "dealer":	
                victor = 0
            else:
                victor = 2	# need to decide about this
                winnings = 0
        else:
            victor = 0

        # check if ante is valid
        dealer_has_something = ultimate.dealer_has_pair_or_better(dealer_hand[:2], dealer_hand[2:])
        blind_won = ultimate.has_blind(1, player_combination) - 1 #how much blind got us

        # calculate rewards for first and second rounds (in third victory is already bet, defeat is fold)
        if round_when_bet == 0:
            if victor == 1:
                winnings =  4 + blind_won + (1 if dealer_has_something else 0)
            elif victor == 0:
                winnings =  -6 + (1 if not dealer_has_something else 0)
            allBet += 6
            preflops += 1
        elif round_when_bet == 1:
            if victor == 1:
                winnings =  2 + blind_won + (1 if dealer_has_something else 0)
            elif victor == 0:
                winnings =  -4 + (1 if not dealer_has_something else 0)
            allBet += 4
            flops += 1
        elif round_when_bet == 2:
            if victor == 1:
                winnings =  1 + blind_won + (1 if dealer_has_something else 0)
            elif victor == 0:
                winnings =  -3 + (1 if not dealer_has_something else 0)
            allBet += 3
            rivers += 1
        elif round_when_bet == None:
            winnings = -2
            allBet += 2
            folds += 1
        
 
        budget += winnings
        #print("Winnings: ", winnings)
    
    print("Budget is: ", budget)
    print("Betted: ", allBet)
    print("PreFlops: ", preflops)
    print("Flops:", flops)
    print("Rivers:", rivers)
    print("Folds: ", folds)
    print("Ratio: ", budget / allBet)
    #print(preds)
    interval_size = 0.2
    result = count_intervals(preds, interval_size)
    for interval, count in sorted(result.items()):
        print(f"{interval}: {count}")
    

In [85]:
test_model_with_games(linearModel, best_thresholds,  100000)

Budget is:  -27512.0
Betted:  458391
PreFlops:  47139
Flops: 21353
Rivers: 27129
Folds:  4379
Ratio:  -0.06001863038323178
[-0.2, 0.0): 11131
[-0.4, -0.2): 11545
[-0.6, -0.4): 12888
[-0.8, -0.6): 13406
[-1.0, -0.8): 11737
[-1.2, -1.0): 12460
[-1.4, -1.2): 10796
[-1.6, -1.4): 9861
[-1.8, -1.6): 8002
[-2.0, -1.8): 6864
[-2.2, -2.0): 5403
[-2.4, -2.2): 4149
[-2.6, -2.4): 2953
[-2.8, -2.6): 2295
[-3.0, -2.8): 1691
[-3.2, -3.0): 1114
[-3.4, -3.2): 775
[-3.6, -3.4): 507
[-3.8, -3.6): 387
[-4.0, -3.8): 201
[-4.2, -4.0): 132
[-4.4, -4.2): 122
[-4.6, -4.4): 40
[-4.8, -4.6): 38
[-5.0, -4.8): 27
[0.0, 0.2): 9698
[0.2, 0.4): 9051
[0.4, 0.6): 7837
[0.6, 0.8): 5724
[0.8, 1.0): 5265
[1.0, 1.2): 4357
[1.2, 1.4): 3673
[1.4, 1.6): 1993
[1.6, 1.8): 2130
[1.8, 2.0): 543
[2.0, 2.2): 497
[2.2, 2.4): 380
[2.4, 2.6): 439
[2.6, 2.8): 520
[2.8, 3.0): 394
[3.0, 3.2): 352
[3.2, 3.4): 282
[3.4, 3.6): 187
[3.6, 3.8): 493
[3.8, 4.0): 258
[4.0, 4.2): 161
[4.2, 4.4): 102
[4.4, 4.6): 124
[4.6, 4.8): 123
[4.8, 5.0): 192