In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import time
import pickle

# torch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/TheColellas/Documents/GitHub/botzee/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/TheColellas/Documents/GitHub/botzee/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/TheColellas/Documents/GitHub/botzee/.venv/lib/python3.11/site-packages/ipykernel/

In [2]:
class GameEnvironment:
    def __init__(self):
        self.hand = []
        self.rolls_left = 3
        self.score_card = {
            'ones': -1,
            'twos': -1,
            'threes': -1,
            'fours': -1,
            'fives': -1,
            'sixes': -1,
            'three_of_a_kind': -1,
            'four_of_a_kind': -1,
            'full_house': -1,
            'small_straight': -1,
            'large_straight': -1,
            'yahtzee': -1,
            'chance': -1,
            'total_score': 0
        }
    
    def get_state(self):
        return self.hand + [self.rolls_left] + list(self.score_card.values())

    def roll(self, keepers_idx):
        for i in range(5):
            if i not in keepers_idx:
                self.hand[i] = random.randint(1, 6)
        self.rolls_left -= 1
        return self.hand
    
    def get_potential_scores(self):
        potential_scores = {
            'ones': sum([x for x in self.hand if x == 1]),
            'twos': sum([x for x in self.hand if x == 2]),
            'threes': sum([x for x in self.hand if x == 3]),
            'fours': sum([x for x in self.hand if x == 4]),
            'fives': sum([x for x in self.hand if x == 5]),
            'sixes': sum([x for x in self.hand if x == 6]),
            'three_of_a_kind': sum(self.hand) if (
                np.unique(self.hand, return_counts = True)[1].max() >= 3
            ) else 0,
            'four_of_a_kind': sum(self.hand) if (
                np.unique(self.hand, return_counts = True)[1].max() >= 3
            ) else 0,
            'full_house': 25 if (
                (np.unique(self.hand, return_counts = True)[1].max() == 3) &
                (np.unique(self.hand, return_counts = True)[1].min() == 2)
            ) else 0,
            'small_straight': 30 if (
                all(number in self.hand for number in [1,2,3,4]) or
                all(number in self.hand for number in [2,3,4,5]) or
                all(number in self.hand for number in [3,4,5,6])
            ) else 0,
            'large_straight': 40 if (
                all(number in self.hand for number in [1,2,3,4,5]) or
                all(number in self.hand for number in [2,3,4,5,6])
            ) else 0,
            'yahtzee': 50 if len(set(self.hand)) == 1 else 0,
            'chance': sum(self.hand)
        }
        return potential_scores

    def mark_score(self, chosen_score_type, chosen_score):
        self.score_card[chosen_score_type] = chosen_score
        self.score_card['total_score'] += chosen_score

    def reset(self):
        self.__init__()
        return self.get_state()
    
class DiceModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DiceModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x
    
class ScoreModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ScoreModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [3]:
def select_dice(state, dice_model, EPSILON, THREHOLD = 0.5):
    # Ensure state is a tensor
    state = torch.FloatTensor(state)
    # Exploration
    if random.random() < EPSILON:
        return torch.randint(1, 2, (5,))
    # Exploitation
    else:
        probs = dice_model(state)
        return (probs > THREHOLD).float()
        
def select_score(state, score_model, EPSILON):
    # Ensure state is a tensor
    state = torch.FloatTensor(state)
    # Exploration
    if random.random() < EPSILON:
        return torch.randint(0, 13, (13,))
    # Exploitation
    else:
        logits = score_model(state)
        probs = torch.softmax(logits, dim=0)
        invalid_actions = [x for x in range(13) if state[6:-1][x] != -1]
        mask = torch.ones(13)
        mask[invalid_actions] = 0
        probs = probs * mask
        probs = probs / probs.sum()
        return torch.multinomial(probs, 1).item()

def simulate_game(model, EPSILON):
    env = GameEnvironment()
    state = torch.FloatTensor(env.reset())
    total_reward = 0
    
    while all(~env.score_card.values()):
        # Roll dice
        num_dice = 5 - env.hand
        env.hand = [random.randint(1, 7) for _ in range(num_dice)]
        # Select dice to keep
        keepers = select_dice(state, model, EPSILON)
        keepers_idx = [i for i in range(5) if keepers[i] == 1]
        # Update hand
        env.hand = env.hand[keepers_idx]
        env.rolls_left -= 1

In [4]:
score_label_dict = {
    0: 'ones',
    1: 'twos',
    2: 'threes',
    3: 'fours',
    4: 'fives',
    5: 'sixes',
    6: 'three_of_a_kind',
    7: 'four_of_a_kind',
    8: 'full_house',
    9: 'small_straight',
    10: 'large_straight',
    11: 'yahtzee',
    12: 'chance'
}

In [5]:
dice_model = DiceModel(20, 64, 5)
score_model = ScoreModel(20, 64, 13)
EPSILON = 0.0

env = GameEnvironment()
state = torch.FloatTensor(env.reset())
total_reward = 0

while any(value < 0 for value in env.score_card.values()):

    # First two rolls
    for _ in range(2):
        # Roll dice and add to hand
        num_dice_to_roll = 5 - len(env.hand)
        rolled_dice = [random.randint(1, 7) for _ in range(num_dice_to_roll)]
        env.hand = env.hand + rolled_dice
        # Select dice to keep
        state = torch.FloatTensor(env.get_state())
        dice_decision = select_dice(state, dice_model, EPSILON)
        env.hand = [env.hand[x] for x in range(5) if dice_decision[x] == 1]
        # Calculate reward
        env.rolls_left -= 1
        next_state = env.get_state()

    # Final roll
    num_dice_to_roll = 5 - len(env.hand)
    rolled_dice = [random.randint(1, 7) for _ in range(num_dice_to_roll)]
    env.hand = env.hand + rolled_dice

    # Select score
    state = torch.FloatTensor(env.get_state())
    score_decision = score_label_dict[select_score(state, score_model, EPSILON)]
    score_amount = env.get_potential_scores()[score_decision]
    env.mark_score(score_decision, score_amount)

    print(
        f'Hand: {env.hand}\n'
        f'Chosen Score: {score_decision}\n'
        f'Score Amount: {score_amount}\n'
        f'Total score: {env.score_card["total_score"]}\n'
    )

Hand: [6, 2, 6, 5, 2]
Chosen Score: small_straight
Score Amount: 0
Total score: 0

Hand: [6, 5, 2, 1, 2]
Chosen Score: three_of_a_kind
Score Amount: 0
Total score: 0

Hand: [6, 5, 1, 5, 3]
Chosen Score: four_of_a_kind
Score Amount: 0
Total score: 0

Hand: [6, 5, 5, 3, 7]
Chosen Score: threes
Score Amount: 3
Total score: 3

Hand: [3, 3, 1, 6, 5]
Chosen Score: ones
Score Amount: 1
Total score: 4

Hand: [3, 6, 3, 1, 7]
Chosen Score: sixes
Score Amount: 6
Total score: 10

Hand: [6, 7, 6, 1, 3]
Chosen Score: twos
Score Amount: 0
Total score: 10

Hand: [7, 5, 1, 1, 1]
Chosen Score: fives
Score Amount: 5
Total score: 15

Hand: [1, 3, 1, 7, 6]
Chosen Score: chance
Score Amount: 18
Total score: 33

Hand: [7, 1, 2, 5, 3]
Chosen Score: yahtzee
Score Amount: 0
Total score: 33

Hand: [6, 1, 4, 7, 6]
Chosen Score: large_straight
Score Amount: 0
Total score: 33

Hand: [2, 6, 1, 5, 4]
Chosen Score: full_house
Score Amount: 0
Total score: 33

Hand: [5, 5, 6, 7, 7]
Chosen Score: fours
Score Amount: 0
To

In [6]:
env.score_card

{'ones': 1,
 'twos': 0,
 'threes': 3,
 'fours': 0,
 'fives': 5,
 'sixes': 6,
 'three_of_a_kind': 0,
 'four_of_a_kind': 0,
 'full_house': 0,
 'small_straight': 0,
 'large_straight': 0,
 'yahtzee': 0,
 'chance': 18,
 'total_score': 33}