In [1]:
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from itertools import chain, combinations
from typing import Dict, List, Optional, Set, Tuple, Union

A state in the game consists of a `ScoreCard`, `Roll`, and the `roll_number` from 1 to 3 within the turn. The `ScoreCard` contains all information from the previous turns. The `Roll` just contains the values of the five dice that have been rolled at a given point.

I need to write a function that takes a `Scorecard` and a `Roll` and returns the possible scores for the `Roll` (given what's already been marked on the `ScoreCard`).

I also need to write a function that returns the possible actions given a `Roll` and a `roll_number`. One of these actions is "score", which a player can do given any `Roll` and `roll_number`, and is required to do if the `roll_number` is 3. Actually "score" normally allows one of several actions, given by the outputs of the scoring function described above. The other actions involve leaving some number of the five dice on the table, so the others can be re-rolled if the turn is not over. These last actions are easy to encode: I can just write them as a subset of $\{1, 2, 3, 4, 5\}$, denoting the positions of dice to leave alone; there are 31 possibilities (choosing the whole set means some "score" action is chosen instead of re-rolling).

Finally, I need to write code that actually plays the game by executing the rolls sequentially and calculates scores given the player's actions; i.e., I need to write the reward function.

In [2]:
class Box(Enum):
    Ones = "ones"
    Twos = "twos"
    Threes = "threes"
    Fours = "fours"
    Fives = "fives"
    Sixes = "sixes"
    ThreeOfAKind = "three_of_a_kind"
    FourOfAKind = "four_of_a_kind"
    FullHouse = "full_house"
    SmallStraight = "small_straight"
    LargeStraight = "large_straight"
    Yahtzee = "yahtzee"
    Chance = "chance"

In [3]:
class Die:
    def __init__(self, value: int):
        if value in range(1, 7):
            self.value = value
        else:
            raise ValueError("Value must be an integer from 1 to 6.")
    
    def __repr__(self):
        return f"Value = {self.value}"

In [4]:
@dataclass(frozen=True)
class RollAction:
    dice_positions_to_keep: Tuple

@dataclass(frozen=True)
class ScoreAction:
    score: int
    dice_positions: Tuple
    box: Box

In [5]:
class Roll:
    def __init__(self, *values):
        self.values = values
        self.dice = [Die(v) for v in values]
    
    def __repr__(self):
        return f"Values = {', '.join([str(d.value) for d in self.dice])}"
    
    def dice_positions(self, value) -> Tuple:
        return tuple(pos for pos, v in enumerate(self.values) if v == value)
    
    @property
    def value_counts(self):
        result = defaultdict(int)
        for v in self.values:
            result[v] += 1
        return result
    
    def is_three_of_a_kind(self) -> bool:
        return any([v == 3 for v in self.value_counts.values()])
    
    def is_four_of_a_kind(self) -> bool:
        return any([v == 4 for v in self.value_counts.values()])
    
    def is_full_house(self) -> bool:
        return self.is_three_of_a_kind() and any([v == 2 for v in self.value_counts.values()])

    def is_small_straight(self) -> bool:
        values = set(self.values)
        return {1, 2, 3, 4}.issubset(values) or {2, 3, 4, 5}.issubset(values) or {3, 4, 5, 6}.issubset(values)
    
    def is_large_straight(self) -> bool:
        return tuple(sorted(set(self.values))) in {(1, 2, 3, 4, 5), (2, 3, 4, 5, 6)}
    
    def is_yahtzee(self) -> bool:
        return len(set(self.values)) == 1

    @property
    def score_actions(self) -> List[ScoreAction]:
        """
        Gives the possible score actions for a roll, acting as if the Score Card is empty.
        """
        results = []

        upper_boxes = [Box.Ones, Box.Twos, Box.Threes, Box.Fours, Box.Fives, Box.Sixes]
        for box, value in zip(upper_boxes, range(1, 7)):
            score = sum([v for v in self.values if v == value])
            dice_positions = tuple(pos for pos, v in enumerate(self.values) if v == value)
            results.append(ScoreAction(score, dice_positions, box))
        
        if self.is_three_of_a_kind():
            v = [v for v, c in self.value_counts.items() if c >= 3][0]
            score = 3 * v
            dice_positions = self.dice_positions(v)
            box = Box.ThreeOfAKind
            results.append(ScoreAction(score, dice_positions, box))
        else:
            results.append(ScoreAction(0, (), Box.ThreeOfAKind))
        
        if self.is_four_of_a_kind():
            v = [v for v, c in self.value_counts.items() if c >= 4][0]
            score = 4 * v
            dice_positions = self.dice_positions(v)
            box = Box.FourOfAKind
            results.append(ScoreAction(score, dice_positions, box))
        else:
            results.append(ScoreAction(0, (), Box.FourOfAKind))
        
        if self.is_full_house():
            score = 25
            dice_positions = tuple(range(5))
            box = Box.FullHouse
            results.append(ScoreAction(score, dice_positions, box))
        else:
            results.append(ScoreAction(0, (), Box.FullHouse))

        if self.is_small_straight():
            score = 30
            box = Box.SmallStraight
            if self.is_large_straight():
                dice_positions = tuple(range(5))
                results.append(ScoreAction(score, dice_positions, box))
            else:
                small_straights = [{1, 2, 3, 4}, {2, 3, 4, 5}, {3, 4, 5, 6}]
                values = set(self.values)
                not_in_small_straight = []
                for s in small_straights:
                    if s.issubset(values):
                        not_in_small_straight = list(values - s)
                        break
                if len(not_in_small_straight) == 0:
                    # If there is a repeated value, then return two SmallStraight actions.
                    # These two actions contain different dice positions. This may be
                    # convenient later for choosing which dice to reroll.
                    repeated_value = [v for v, c in self.value_counts.items() if c == 2][0]
                    repeated_value_positions = self.dice_positions(repeated_value)
                    for pos in repeated_value_positions:
                        dice_positions = tuple(sorted(set(range(5)) - {pos}))
                        results.append(ScoreAction(score, dice_positions, box))
                else:
                    dice_positions = tuple(sorted(set(range(5)) - set(self.dice_positions(not_in_small_straight[0]))))
                    results.append(ScoreAction(score, dice_positions, box))

        else:
            results.append(ScoreAction(0, (), Box.LargeStraight))

        if self.is_large_straight():
            score = 40
            dice_positions = tuple(range(5))
            box = Box.LargeStraight
            results.append(ScoreAction(score, dice_positions, box))
        else:
            results.append(ScoreAction(0, (), Box.LargeStraight))

        if self.is_yahtzee():
            v = self.values[0]
            score = 50
            dice_positions = tuple(range(5))
            box = Box.Yahtzee
            results.append(ScoreAction(score, dice_positions, box))
        else:
            results.append(ScoreAction(0, (), Box.Yahtzee))

        # Chance
        score = sum(self.values)
        dice_positions = tuple(range(5))
        box = Box.Chance
        results.append(ScoreAction(score, dice_positions, box))

        return results
    

In [6]:
roll = Roll(1, 3, 6, 4, 5)

In [7]:
roll.score_actions

[ScoreAction(score=1, dice_positions=(0,), box=<Box.Ones: 'ones'>),
 ScoreAction(score=0, dice_positions=(), box=<Box.Twos: 'twos'>),
 ScoreAction(score=3, dice_positions=(1,), box=<Box.Threes: 'threes'>),
 ScoreAction(score=4, dice_positions=(3,), box=<Box.Fours: 'fours'>),
 ScoreAction(score=5, dice_positions=(4,), box=<Box.Fives: 'fives'>),
 ScoreAction(score=6, dice_positions=(2,), box=<Box.Sixes: 'sixes'>),
 ScoreAction(score=0, dice_positions=(), box=<Box.ThreeOfAKind: 'three_of_a_kind'>),
 ScoreAction(score=0, dice_positions=(), box=<Box.FourOfAKind: 'four_of_a_kind'>),
 ScoreAction(score=0, dice_positions=(), box=<Box.FullHouse: 'full_house'>),
 ScoreAction(score=30, dice_positions=(1, 2, 3, 4), box=<Box.SmallStraight: 'small_straight'>),
 ScoreAction(score=0, dice_positions=(), box=<Box.LargeStraight: 'large_straight'>),
 ScoreAction(score=0, dice_positions=(), box=<Box.Yahtzee: 'yahtzee'>),
 ScoreAction(score=19, dice_positions=(0, 1, 2, 3, 4), box=<Box.Chance: 'chance'>)]

In [8]:
small_straights = [{1, 2, 3, 4}, {2, 3, 4, 5}, {3, 4, 5, 6}]
values = set(roll.values)
for s in small_straights:
    if s.issubset(values):
        not_in_small_straight = list(values - s)
        break


In [9]:
@dataclass
class ScoreCard:
    ones: Optional[int] = None
    twos: Optional[int] = None
    threes: Optional[int] = None
    fours: Optional[int] = None
    fives: Optional[int] = None
    sixes: Optional[int] = None

    three_of_a_kind: Optional[int] = None
    four_of_a_kind: Optional[int] = None
    full_house: Optional[int] = None
    small_straight: Optional[int] = None
    large_straight: Optional[int] = None
    yahtzee: Optional[int] = None
    chance: Optional[int] = None

    @property
    def top_scores_dict(self) -> Dict[str, Optional[int]]:
        return {"ones": self.ones, "twos": self.twos, "threes": self.threes, "fours": self.fours, "fives": self.fives, "sixes": self.sixes}
    
    @property
    def bottom_scores_dict(self) -> Dict[str, Optional[int]]:
        return {"three_of_a_kind": self.three_of_a_kind, "four_of_a_kind": self.four_of_a_kind, "full_house": self.full_house, "small_straight": self.small_straight, "large_straight": self.large_straight, "yahtzee": self.yahtzee, "chance": self.chance}

    @property
    def all_scores_dict(self) -> Dict[str, Optional[int]]:
        return {**self.top_scores_dict, **self.bottom_scores_dict}
    
    @property
    def top_score(self) -> int:
        partial_sum = sum([s if s is not None else 0 for s in self.top_scores_dict.values()])
        return partial_sum + 35 if partial_sum >= 63 else partial_sum
    
    @property
    def bottom_score(self) -> int:
        return sum([s if s is not None else 0 for s in self.bottom_scores_dict.values()])
    
    @property 
    def score(self) -> int:
        return self.top_score + self.bottom_score
    
    @property
    def game_finished(self) -> bool:
        return all(self.all_scores_dict.values())

In [10]:
class GameState:
    def __init__(self, scorecard: ScoreCard, roll: Roll, rolls_completed: int):
        if rolls_completed not in {1, 2, 3}:
            raise ValueError("`rolls_completed` must be 1, 2, or 3")
        self.scorecard = scorecard
        self.roll = roll
        self.rolls_completed = rolls_completed
        self.possible_roll_actions = set(RollAction(ps) for ps in chain.from_iterable([list(combinations(range(5), i)) for i in range(5)]))

    @property
    def possible_scores(self) -> Set[ScoreAction]:
        """
        Takes scorecard and roll and returns all possible scores for the roll.
        Also returns the positions of the dice (from 0 to 4) and the box 
        (on the scorecard) that give that score.
        """
        
        return {ScoreAction(18, (0, 1, 2), Box.Sixes)}  # placeholder until real implementation
    
    @property
    def possible_actions(self) -> Set[Union[RollAction, ScoreAction]]:
        """
        Takes scorecard and roll_number and returns the possible actions.
        """
        return self.possible_roll_actions
        

In [11]:
scorecard = ScoreCard()
roll = Roll(1, 2, 3, 4, 5)
rolls_completed = 1

gamestate = GameState(scorecard, roll, rolls_completed)