In [33]:
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional, Set, Tuple

In [38]:
class Box(Enum):
    Ones = "ones"
    Twos = "twos"
    Threes = "threes"
    Fours = "fours"
    Fives = "fives"
    Sixes = "sixes"
    ThreeOfAKind = "three_of_a_kind"
    FourOfAKind = "four_of_a_kind"
    FullHouse = "full_house"
    SmallStraight = "small_straight"
    LargeStraight = "large_straight"
    Yahtzee = "yahtzee"
    Chance = "chance"

In [23]:
class Die:
    def __init__(self, value: int):
        if value in range(1, 7):
            self.value = value
        else:
            raise ValueError("Value must be an integer from 1 to 6.")
    
    def __repr__(self):
        return f"Value = {self.value}"

In [31]:
class Roll:
    def __init__(self, *values):
        self.values = values
        self.dice = [Die(v) for v in values]
    
    def __repr__(self):
        return f"Values = {', '.join([str(d.value) for d in self.dice])}"
    
    @property
    def value_counts(self):
        result = defaultdict(int)
        for v in self.values:
            result[v] += 1
        return result
    
    def is_three_of_a_kind(self) -> bool:
        return any([v == 3 for v in self.value_counts.values()])
    
    def is_four_of_a_kind(self) -> bool:
        return any([v == 4 for v in self.value_counts.values()])
    
    def is_full_house(self) -> bool:
        return self.is_three_of_a_kind() and any([v == 2 for v in self.value_counts.values()])
    
    def is_large_straight(self) -> bool:
        return tuple(sorted(set(self.values))) in {(1, 2, 3, 4, 5), (2, 3, 4, 5, 6)}
    
    def is_small_straight(self) -> bool:
        return self.is_large_straight() or tuple(sorted(set(self.values))) in {(1, 2, 3, 4, 6), (1, 3, 4, 5, 6)}
    
    def is_yahtzee(self) -> bool:
        return len(set(self.values)) == 1
    

In [25]:
@dataclass
class ScoreCard:
    ones: Optional[int] = None
    twos: Optional[int] = None
    threes: Optional[int] = None
    fours: Optional[int] = None
    fives: Optional[int] = None
    sixes: Optional[int] = None

    three_of_a_kind: Optional[int] = None
    four_of_a_kind: Optional[int] = None
    full_house: Optional[int] = None
    small_straight: Optional[int] = None
    large_straight: Optional[int] = None
    yahtzee: Optional[int] = None
    chance: Optional[int] = None

    @property
    def top_scores_dict(self) -> Dict[str, Optional[int]]:
        return {"ones": self.ones, "twos": self.twos, "threes": self.threes, "fours": self.fours, "fives": self.fives, "sixes": self.sixes}
    
    @property
    def bottom_scores_dict(self) -> Dict[str, Optional[int]]:
        return {"three_of_a_kind": self.three_of_a_kind, "four_of_a_kind": self.four_of_a_kind, "full_house": self.full_house, "small_straight": self.small_straight, "large_straight": self.large_straight, "yahtzee": self.yahtzee, "chance": self.chance}

    @property
    def all_scores_dict(self) -> Dict[str, Optional[int]]:
        return {**self.top_scores_dict, **self.bottom_scores_dict}
    
    @property
    def top_score(self) -> int:
        partial_sum = sum([s if s is not None else 0 for s in self.top_scores_dict.values()])
        return partial_sum + 35 if partial_sum >= 63 else partial_sum
    
    @property
    def bottom_score(self) -> int:
        return sum([s if s is not None else 0 for s in self.bottom_scores_dict.values()])
    
    @property 
    def score(self) -> int:
        return self.top_score + self.bottom_score
    
    @property
    def game_finished(self) -> bool:
        return all(self.all_scores_dict.values())

In [41]:
@dataclass(frozen=True)
class RollAction:
    dice_positions_to_keep: Set[int]

@dataclass(frozen=True)
class ScoreAction:
    score: int
    dice_positions: Set[int]
    box: Box

In [43]:
class GameState:
    def __init__(self, scorecard: ScoreCard, roll: Roll, rolls_completed: int):
        if rolls_completed not in {1, 2, 3}:
            raise ValueError("`rolls_completed` must be 1, 2, or 3")
        self.scorecard = scorecard
        self.roll = roll
        self.rolls_completed = rolls_completed

    @property
    def possible_scores(self) -> Set[ScoreAction]:
        """
        Takes scorecard and roll and returns all possible scores for the roll.
        Also returns the positions of the dice and the box (on the scorecard) 
        that give that score.
        """
        # maybe structure this as a DataFrame?
        return {ScoreAction(18, {1, 2, 3}, Box.Sixes)}  # placeholder until real implementation
    
    @property
    def possible_actions(self):
        """
        Takes scorecard and roll_number and returns the possible actions.
        """

In [30]:
Roll(1, 2, 3, 4, 5)

Values = 1, 2, 3, 4, 5

A state in the game consists of a `ScoreCard`, `Roll`, and the `roll_number` from 1 to 3 within the turn. The `ScoreCard` contains all information from the previous turns. The `Roll` just contains the values of the five dice that have been rolled at a given point.

I need to write a function that takes a `Scorecard` and a `Roll` and returns the possible scores for the `Roll` (given what's already been marked on the `ScoreCard`).

I also need to write a function that returns the possible actions given a `Roll` and a `roll_number`. One of these actions is "score", which a player can do given any `Roll` and `roll_number`, and is required to do if the `roll_number` is 3. Actually "score" normally allows one of several actions, given by the outputs of the scoring function described above. The other actions involve leaving some number of the five dice on the table, so the others can be re-rolled if the turn is not over. These last actions are easy to encode: I can just write them as a subset of $\{1, 2, 3, 4, 5\}$, denoting the positions of dice to leave alone; there are 31 possibilities (choosing the empty set means some "score" action is chosen instead of re-rolling).

Finally, I need to write code that actually plays the game by executing the rolls sequentially and calculates scores given the player's actions; i.e., I need to write the reward function.

In [10]:
scorecard = ScoreCard()

In [11]:
scorecard.game_finished

False