In [3]:
import pandas as pd
import numpy as np
import random
import ultimate
import itertools

In [4]:
# define card set
suits = ['Hearts', 'Diamonds', 'Clubs', 'Spades']
ranks = ['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A']
rank_values = {rank: i for i, rank in enumerate(ranks, start=2)}

deck = [{'rank': rank, 'suit': suit} for suit in suits for rank in ranks]

combinations = ["High Card", "One Pair", "Two Pair", "Three of a Kind", "Four of a Kind", 
                "Full House", "Straight", "Flush", "Straight Flush", "Royal Flush"]
combinations_values = {combination: i for i, combination in enumerate(combinations, start=1)}

winning_hands = ["High Card", "One Pair", "Two Pair", "Three of a Kind", "Straight", "Flush", 
                "Full House", "Four of a Kind", "Straight Flush", "Royal Flush"]

winning_hand_ranks = {hand: i for i, hand in enumerate(winning_hands)}
enumerated_deck = dict(enumerate(deck, start=1))
num_deck = list(range(1, 53))

In [5]:
# ante is 1, blind is 1, player bets 1 or 0 (return bet or 
# fold depending whether you will win or lose)
def decide_game_victor_and_calculate_rewards_for_bets(row):
    
    player_hand = [enumerated_deck[card] for card in row[0:7]]
    dealer_hand = [enumerated_deck[card] for card in row[2:]]

    player_combination = ultimate.get_best_hand(player_hand)
    dealer_combination = ultimate.get_best_hand(dealer_hand)

    player_rank = winning_hand_ranks[player_combination]
    dealer_rank = winning_hand_ranks[dealer_combination]

    victor = 0 # 0 = dealer, 1 = player
    
    if player_rank > dealer_rank:
        victor = 1
    elif player_rank == dealer_rank:
        result = ultimate.decider(player_combination, player_hand, 
                                  dealer_combination, dealer_hand)
        if result == "player":
            victor = 1
        elif result == "dealer":	
            victor = 0
        else:
            victor = random.randint(0, 1)	# need to decide about this
    else:
        victor = 0

    # check if ante is valid
    dealer_has_something = ultimate.dealer_has_pair_or_better(dealer_hand[:2], dealer_hand[2:])
    blind_won = ultimate.has_blind(1, player_combination) - 1 #how much blind got us

    # calculate rewards for first and second rounds (in third victory is already bet, defeat is fold)
    first_round = 0
    second_round = 0
    third_round = 0

    # if its a draw, we leave both values at zero
    if victor == 1:
        first_round = 4 + blind_won + (1 if dealer_has_something else 0)
        second_round = 2 + blind_won + (1 if dealer_has_something else 0)
        third_round = 1 + blind_won + (1 if dealer_has_something else 0)
    elif victor == 0:
        first_round = -6
        second_round = -4
        third_round = -3
    
    return [first_round, second_round, third_round]

# function for conversion back to cards for checking
def convert_to_cards(row):
    cards = row[:9]
    output = [(enumerated_deck[card]["rank"], enumerated_deck[card]["suit"])  for card in cards]
    return output


In [6]:
all_starting_hands = list(itertools.combinations(num_deck, 2))
print(len(all_starting_hands))

1326


In [9]:
all_starting_hands = list(itertools.combinations(num_deck, 2))
multiplier_D = 10
multiplier_1 = 7
multiplier_2 = 5

header = ["C1", "C2", "R1", "R2", "R3", "R4", "R5", "D1", "D2"]
rows = []

for idx, player in enumerate(all_starting_hands):
    player = list(player)  
    for _ in range(multiplier_1):
        remaining_deck = [card for card in num_deck if card not in player]
        flop = random.sample(remaining_deck, 3)
        for _ in range(multiplier_2):
            used = set(player + flop) 
            river = random.sample([card for card in num_deck if card not in used], 2)
            for _ in range(multiplier_D):
                used2 = set(player + flop + river)
                dealer = random.sample([card for card in num_deck if card not in used2], 2)
                rows.append(player + flop + river + dealer)

full_data_set = pd.DataFrame(rows, columns=header)
print("Generated all variables")

full_data_set[["Q1", "Q2", "Q3"]] = full_data_set.apply(
    decide_game_victor_and_calculate_rewards_for_bets, axis=1, result_type="expand"
)
print("Rewards calculated")


def calc_stats_with_cards(df, group_cols, target_col):
    stats = df.copy()

    agg_stats = df.groupby(group_cols, sort=False).agg(
        EV=(target_col, "mean"),
        Std=(target_col, "std"),
        WinRate=(target_col, lambda x: (x > 0).mean())
    ).reset_index()
    
    agg_stats["Sharpe"] = agg_stats["EV"] / agg_stats["Std"].replace(0, np.nan)
    agg_stats = agg_stats.fillna(0)
    stats = stats.merge(agg_stats, on=group_cols, how="left")
    
    return stats

#stats_Q1 = calc_stats_with_cards(full_data_set, ["C1", "C2"], "Q1")
#stats_Q2 = calc_stats_with_cards(full_data_set, ["C1", "C2", "R1", "R2", "R3"], "Q2")
#stats_Q3 = calc_stats_with_cards(full_data_set, ["C1", "C2", "R1", "R2", "R3", "R4", "R5"], "Q3")

stats_Q1 = full_data_set[["C1", "C2", "Q1"]].copy()
stats_Q2 = full_data_set[["C1", "C2", "R1", "R2", "R3", "Q2"]].copy()
stats_Q3 = full_data_set[["C1", "C2", "R1", "R2", "R3", "R4", "R5", "Q3"]].copy()
np.save("data_for_first_round_Q.npy", stats_Q1.to_numpy())
np.save("data_for_second_round_Q.npy", stats_Q2.to_numpy())
np.save("data_for_third_round_Q.npy", stats_Q3.to_numpy())

Generated all variables
Rewards calculated


In [10]:
print(stats_Q1.head(5))
print(stats_Q2.head(5))
print(stats_Q3.head(5))

   C1  C2   Q1
0   1   2  5.0
1   1   2 -6.0
2   1   2 -6.0
3   1   2 -6.0
4   1   2 -6.0
   C1  C2  R1  R2  R3   Q2
0   1   2  38  26   5  3.0
1   1   2  38  26   5 -4.0
2   1   2  38  26   5 -4.0
3   1   2  38  26   5 -4.0
4   1   2  38  26   5 -4.0
   C1  C2  R1  R2  R3  R4  R5   Q3
0   1   2  38  26   5  31  52  2.0
1   1   2  38  26   5  31  52 -3.0
2   1   2  38  26   5  31  52 -3.0
3   1   2  38  26   5  31  52 -3.0
4   1   2  38  26   5  31  52 -3.0


In [162]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

In [164]:
# ===== Pad sequences =====
def pad_to_length(tensor_list, max_len=9):
    padded = nn.utils.rnn.pad_sequence(tensor_list, batch_first=True, padding_value=0)
    if padded.size(1) < max_len:
        pad_size = max_len - padded.size(1)
        padded = F.pad(padded, (0, pad_size), value=0)
    else:
        padded = padded[:, :max_len]
    return padded

def process_round_data(npy_file, max_len=9):
    data = np.load(npy_file)
    x = data[:, :9]  # vse karte
    y = data[:, -1]  # Sharpe
    x_tensor_list = [torch.tensor(seq, dtype=torch.long) for seq in x]
    x_padded = pad_to_length(x_tensor_list, max_len=max_len)
    y_tensor = torch.tensor(y, dtype=torch.float)
    return x_padded, y_tensor

# ===== Load datasets =====
x_first, y_first = process_round_data('data_for_first_round.npy', max_len=9)
x_second, y_second = process_round_data('data_for_second_round.npy', max_len=9)
x_third, y_third = process_round_data('data_for_third_round.npy', max_len=9)

x_all = torch.cat([x_first, x_second, x_third], dim=0)
y_all = torch.cat([y_first, y_second, y_third], dim=0)

dataset_all = TensorDataset(x_all, y_all)
dataloader_all = DataLoader(dataset_all, batch_size=32, shuffle=True)

print("Combined input shape:", x_all.shape)
print("Combined label shape:", y_all.shape)

Combined input shape: torch.Size([11934000, 9])
Combined label shape: torch.Size([11934000])


In [228]:
class EmbeddingNetSharpe(nn.Module):
    def __init__(self, num_cards=9, embed_dim=16, init_thresholds=None):
        super().__init__()
        self.num_cards = num_cards
        self.embed_dim = embed_dim

        # --- Card embedding ---
        self.embedding = nn.Embedding(53, embed_dim, padding_idx=0)
        self.fc1 = nn.Linear(num_cards * embed_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.dropout = nn.Dropout(0.2)
        self.sharpe_head = nn.Linear(32, 1)  # Sharpe napoved

        # --- Learnable thresholds ---
        if init_thresholds is None:
            init_thresholds = [-0.5, -0.3, -0.1]  # preflop, flop, river
        self.thresholds = nn.Parameter(torch.tensor(init_thresholds, dtype=torch.float32))

    def forward(self, x):
        embedded = self.embedding(x)  # [batch, num_cards, embed_dim]
        flat = embedded.view(x.size(0), self.num_cards * self.embed_dim)
        out = F.relu(self.fc1(flat))
        out = self.dropout(out)
        out = F.relu(self.fc2(out))
        sharpe = self.sharpe_head(out).squeeze(1)  # [batch]
        return sharpe  # napoved Sharpe

    def get_threshold(self, round_idx):
        """Vrne threshold za določen krog (0=preflop, 1=flop, 2=river)."""
        return self.thresholds[round_idx]


In [230]:
def train_model(model, dataloader, epochs=20, lr=0.001):
    """
    Treniramo model za napoved Sharpe in hkrati hranimo learnable thresholds.
    """
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # Izračunamo mean in std iz tarč (za normalizacijo)
    all_targets = []
    for _, y_batch in dataloader:
        all_targets.append(y_batch)
    all_targets = torch.cat(all_targets, dim=0)
    mean_sharpe = all_targets.mean() 
    std_sharpe = all_targets.std()

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0

        for x_batch, y_batch in dataloader:
            optimizer.zero_grad()

            # Normaliziramo target
            y_batch_norm = (y_batch - mean_sharpe) / std_sharpe

            pred_sharpe = model(x_batch)  # vrača samo Sharpe napoved

            loss = criterion(pred_sharpe, y_batch_norm)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch}/{epochs} | Avg Loss: {avg_loss:.6f} | Thresholds: {model.thresholds.data.tolist()}")

    return mean_sharpe, std_sharpe


In [197]:
# ===== Train =====
model = EmbeddingNetSharpe(num_cards=9, embed_dim=16)
train_model(model, dataloader_all, epochs=3, lr=0.00001)

Epoch 1/3 | Avg Loss: 0.995715
Epoch 2/3 | Avg Loss: 0.991336
Epoch 3/3 | Avg Loss: 0.989106


(tensor(-0.0186), tensor(2.9630))

In [None]:
def optimize_thresholds(model, mean_sharpe, std_sharpe, num_games=500, steps=30, lr=0.05):
    """
    Optimizira learnable thresholds modela glede na simulacijo igre.
    """
    optimizer = torch.optim.Adam([model.thresholds], lr=lr)

    for step in range(steps):
        optimizer.zero_grad()

        # Simulacija igre (score = ROI ali budget)
        score = test_model_with_games_sharpe(model, num_games=num_games,
                                                         mean_sharpe=mean_sharpe, std_sharpe=std_sharpe)

        # Maksimiramo score ⇒ minimiziramo negativnega
        loss = -torch.tensor(score, dtype=torch.float32)
        loss.backward()
        optimizer.step()

        print(f"Step {step+1}/{steps} | Score: {score:.4f} | Thresholds: {model.thresholds.data.tolist()}")


In [198]:
def get_model_input_based_on_round(cards, round, max_len=9):
    # Izberi število kart glede na krog
    if round == 0:
        input_cards = cards[:2]
    elif round == 1:
        input_cards = cards[:5]
    else:
        input_cards = cards[:7]
    
    # Pretvori v tensor
    input_tensor = torch.tensor(input_cards, dtype=torch.long)
    
    # Pad z ničlami, da je dolžina vedno max_len
    input_tensor = F.pad(input_tensor, (0, max_len - len(input_tensor)), value=0)
    
    return input_tensor.unsqueeze(0)  # shape: (1, max_len)


In [233]:
def test_model_with_games_sharpe(model, num_games=100, mean_sharpe=0.0, std_sharpe=1.0, verbose=True):
    """
    Simulira igre in izračuna budget, house edge, porazdelitev stav po krogih.
    Model mora imeti learnable thresholds (EmbeddingNetSharpe).
    """
    model.eval()
    budget = 0
    allBet = 0
    folds = 0
    flops = 0
    rivers = 0
    preflops = 0

    for i in range(num_games):
        # generate game (9 cards: player, river, dealer)
        cards = random.sample(num_deck, 9)
        winnings = 0
        round = 0
        round_when_bet = None

        while True:
            model_input = get_model_input_based_on_round(cards, round)

            with torch.no_grad():
                pred_sharpe = model(model_input.unsqueeze(0))  # [1, num_cards]
            pred_sharpe_denorm = pred_sharpe * std_sharpe + mean_sharpe
            ev_value = pred_sharpe_denorm.item()

            threshold = model.get_threshold(round).item()

            if ev_value > threshold and round_when_bet is None:
                round_when_bet = round
                break

            round += 1
            if round == 3:
                break

        # calculate winnings same as before
        player_hand = [enumerated_deck[card] for card in cards[0:7]]
        dealer_hand = [enumerated_deck[card] for card in cards[2:]]

        player_combination = ultimate.get_best_hand(player_hand)
        dealer_combination = ultimate.get_best_hand(dealer_hand)

        player_rank = winning_hand_ranks[player_combination]
        dealer_rank = winning_hand_ranks[dealer_combination]

        victor = 0
        if player_rank > dealer_rank:
            victor = 1
        elif player_rank == dealer_rank:
            result = ultimate.decider(player_combination, player_hand,
                                      dealer_combination, dealer_hand)
            if result == "player":
                victor = 1
            elif result == "dealer":
                victor = 0
            else:
                victor = 2
                winnings = 0
        else:
            victor = 0

        dealer_has_something = ultimate.dealer_has_pair_or_better(dealer_hand[:2], dealer_hand[2:])
        blind_won = ultimate.has_blind(1, player_combination) - 1

        # rewards based on round bet
        if round_when_bet == 0:
            if victor == 1:
                winnings = 4 + blind_won + (1 if dealer_has_something else 0)
            elif victor == 0:
                winnings = -6
            allBet += 6
            preflops += 1
        elif round_when_bet == 1:
            if victor == 1:
                winnings = 2 + blind_won + (1 if dealer_has_something else 0)
            elif victor == 0:
                winnings = -4
            allBet += 4
            flops += 1
        elif round_when_bet == 2:
            if victor == 1:
                winnings = 1 + blind_won + (1 if dealer_has_something else 0)
            elif victor == 0:
                winnings = -3
            allBet += 3
            rivers += 1
        elif round_when_bet is None:
            winnings = -2
            allBet += 2
            folds += 1

        budget += winnings

    house_edge = ((-budget) / allBet) * 100 if allBet > 0 else 0

    if verbose:
        print(f"Budget: {budget}")
        print(f"Betted: {allBet}")
        print(f"House Edge: {house_edge:.2f}%")
        print(f"PreFlops: {preflops}")
        print(f"Flops: {flops}")
        print(f"Rivers: {rivers}")
        print(f"Folds: {folds}")
        print(f"Thresholds: {model.thresholds.data.tolist()}")

    return budget / allBet  # ROI kot score


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

class EmbeddingNetSharpe(nn.Module):
    def __init__(self, num_cards=9, embed_dim=16, init_thresholds=None):
        super().__init__()
        self.num_cards = num_cards
        self.embed_dim = embed_dim
    
        self.embedding = nn.Embedding(53, embed_dim, padding_idx=0)
        self.fc1 = nn.Linear(num_cards * embed_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.dropout = nn.Dropout(0.2)
        self.sharpe_head = nn.Linear(32, 1)

        if init_thresholds is None:
            init_thresholds = [-0.1, -0.1, -0.1]  # preflop, flop, river
        self.thresholds = nn.Parameter(torch.tensor(init_thresholds, dtype=torch.float32))

    def forward(self, x):
        embedded = self.embedding(x)
        flat = embedded.view(x.size(0), self.num_cards * self.embed_dim)
        out = F.relu(self.fc1(flat))
        out = self.dropout(out)
        out = F.relu(self.fc2(out))
        sharpe = self.sharpe_head(out).squeeze(1)
        return sharpe

    def get_threshold(self, round_idx):
        return self.thresholds[round_idx]


def train_model(model, dataloader, epochs=20, lr=0.001):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    all_targets = [y for _, y in dataloader]
    all_targets = torch.cat(all_targets, dim=0)
    mean_sharpe = all_targets.mean() + 0.11
    std_sharpe = all_targets.std()

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0
        for x_batch, y_batch in dataloader:
            optimizer.zero_grad()
            y_batch_norm = (y_batch - mean_sharpe) / std_sharpe
            pred_sharpe = model(x_batch)
            loss = criterion(pred_sharpe, y_batch_norm)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch}/{epochs} | Avg Loss: {avg_loss:.6f} | Thresholds: {model.thresholds.data.tolist()}")

    return mean_sharpe, std_sharpe


def test_model_with_games_sharpe_soft(model, num_games=100, mean_sharpe=0.0, std_sharpe=1.0, k=20.0):
    model.eval()
    total_budget = torch.zeros(1, dtype=torch.float32)
    total_bet = torch.zeros(1, dtype=torch.float32)

    for _ in range(num_games):
        cards = random.sample(num_deck, 9)

        
        ev_values = []
        for round_idx in range(3):
            model_input = get_model_input_based_on_round(cards, round_idx)
            pred_sharpe = model(model_input.unsqueeze(0))
            pred_sharpe_denorm = pred_sharpe * std_sharpe + mean_sharpe
            ev_values.append(pred_sharpe_denorm)

        ev_values = torch.stack(ev_values).squeeze(1)  
        thresholds = model.thresholds

        p_bet_round = torch.sigmoid(k * (ev_values - thresholds))

        p_continue = torch.cumprod(1 - p_bet_round + 1e-6, dim=0)
        p_bet_first_time = p_bet_round * torch.cat([torch.tensor([1.0]), p_continue[:-1]])

        winnings_per_round = torch.tensor([4, 2, 1], dtype=torch.float32)
        costs_per_round = torch.tensor([6, 4, 3], dtype=torch.float32)

        expected_gain = winnings_per_round * 0.45 - costs_per_round * 0.55
        expected_budget = (p_bet_first_time * expected_gain).sum()
        expected_bet = (p_bet_first_time * costs_per_round).sum()

        total_budget += expected_budget
        total_bet += expected_bet

    roi = total_budget / (total_bet + 1e-6)
    return roi


def optimize_thresholds_with_simulation_soft(model, mean_sharpe, std_sharpe,
                                             num_games=500, steps=30, lr=0.05, k=20.0):
    optimizer = torch.optim.Adam([model.thresholds], lr=lr)

    for step in range(steps):
        optimizer.zero_grad()
        roi = test_model_with_games_sharpe_soft(model, num_games=num_games,
                                                mean_sharpe=mean_sharpe,
                                                std_sharpe=std_sharpe,
                                                k=k)
        loss = -roi  
        loss.backward()
        optimizer.step()

        print(f"Step {step+1}/{steps} | ROI: {roi.item():.4f} | Thresholds: {model.thresholds.data.tolist()}")


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EmbeddingNetSharpe(num_cards=9, embed_dim=16).to(device)
mean_sharpe, std_sharpe = train_model(model, dataloader_all, epochs=3, lr=0.00001)

roi_before = test_model_with_games_sharpe_soft(
    model,
    num_games=1000,          
    mean_sharpe=mean_sharpe,
    std_sharpe=std_sharpe,
    k=20.0)                  

print(f"[BEFORE] ROI (soft): {roi_before.item():.4f}")
print(f"[BEFORE] Thresholds: {model.thresholds.data.tolist()}")

optimize_thresholds_with_simulation_soft(
    model,
    mean_sharpe=mean_sharpe,
    std_sharpe=std_sharpe,
    num_games=2000,       
    steps=30,               
    lr=5e-2,                 
)

roi_after = test_model_with_games_sharpe_soft(
    model,
    num_games=1000,
    mean_sharpe=mean_sharpe,
    std_sharpe=std_sharpe,
    k=20.0
)
print(f"[AFTER ] ROI (soft): {roi_after.item():.4f}")
print(f"[AFTER ] Thresholds: {model.thresholds.data.tolist()}")

Epoch 1/3 | Avg Loss: 0.996283 | Thresholds: [-0.10000000149011612, -0.10000000149011612, -0.10000000149011612]
Epoch 2/3 | Avg Loss: 0.991269 | Thresholds: [-0.10000000149011612, -0.10000000149011612, -0.10000000149011612]
Epoch 3/3 | Avg Loss: 0.988946 | Thresholds: [-0.10000000149011612, -0.10000000149011612, -0.10000000149011612]
[BEFORE] ROI (soft): -0.2637
[BEFORE] Thresholds: [-0.10000000149011612, -0.10000000149011612, -0.10000000149011612]
Step 1/30 | ROI: -0.2651 | Thresholds: [-0.15000000596046448, -0.05000003054738045, -0.050000015646219254]
Step 2/30 | ROI: -0.2582 | Thresholds: [-0.198029562830925, -0.0005878768861293793, -0.0013807862997055054]
Step 3/30 | ROI: -0.2539 | Thresholds: [-0.24245710670948029, 0.04586785286664963, 0.04446312040090561]
Step 4/30 | ROI: -0.2515 | Thresholds: [-0.2823464870452881, 0.08799655735492706, 0.08537480980157852]
Step 5/30 | ROI: -0.2508 | Thresholds: [-0.31780827045440674, 0.1257762312889099, 0.12157359719276428]
Step 6/30 | ROI: -0.25