In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotnine import *

### Utility Functions

In [20]:
def roll_die(ndie = 5) :
    "This function sample n dice with replacement"
    rolls = np.random.choice(range(1,7), ndie, replace=True)
    rolls.sort()
    return(rolls)
    
roll_die()

array([2, 2, 2, 3, 3])

### Exploratory Data Analysis

In [21]:
num_yahtzee = 0
for i in range(10000):
    roll = roll_die()
    unique = np.unique(roll)
    if len(unique) == 1:
        num_yahtzee += 1
print('# yahtzees', num_yahtzee)

# yahtzees 5


### Yahtzee Game

In [60]:
import random

def roll_dice(num_dice=5):
  return [random.randint(1, 6) for _ in range(num_dice)]

class Yahtzee:
  def __init__(self):
    self.scores = {
      "Ones": None,
      "Twos": None,
      "Threes": None,
      "Fours": None,
      "Fives": None,
      "Sixes": None,
      "Three of a Kind": None,
      "Four of a Kind": None,
      "Full House": None,
      "Small Straight": None,
      "Large Straight": None,
      "Yahtzee": None,
      "Chance": None,
    }

  def is_full(self):
    return all(value is not None for value in self.scores.values())

  def check_potential_scores(self, roll):
    scores = {}
    if sum([dice for dice in roll if dice == 1]) > 0:
      scores["Ones"] = sum([dice for dice in roll if dice == 1])
    if sum([dice for dice in roll if dice == 2]) > 0:
      scores["Twos"] = sum([dice for dice in roll if dice == 2])
    if sum([dice for dice in roll if dice == 3]) > 0:
      scores["Threes"] = sum([dice for dice in roll if dice == 3])
    if sum([dice for dice in roll if dice == 4]) > 0:
      scores["Fours"] = sum([dice for dice in roll if dice == 4])
    if sum([dice for dice in roll if dice == 5]) > 0:
      scores["Fives"] = sum([dice for dice in roll if dice == 5])
    if sum([dice for dice in roll if dice == 6]) > 0:
      scores["Sixes"] = sum([dice for dice in roll if dice == 6])
    if self._check_n_of_a_kind(roll, 3):
      scores["Three of a Kind"] = sum(roll) if self._check_n_of_a_kind(roll, 3) else 0
    if self._check_n_of_a_kind(roll, 4):
      scores["Four of a Kind"] = sum(roll)
    if self._check_full_house(roll):
      scores["Full House"] = 25
    if self._check_straight(roll, 4):
      scores["Small Straight"] = 30
    if self._check_straight(roll, 5):
      scores["Large Straight"] = 40
    if self._check_n_of_a_kind(roll, 5):
      scores["Yahtzee"] = 50
    scores["Chance"] = sum(roll)
    return scores

  def update_score(self, roll, category):
    if category == "Ones":
      self.scores["Ones"] = sum([dice for dice in roll if dice == 1])
    elif category == "Twos":
      self.scores["Twos"] = sum([dice for dice in roll if dice == 2])
    elif category == "Threes":
      self.scores["Threes"] = sum([dice for dice in roll if dice == 3])
    elif category == "Fours":
      self.scores["Fours"] = sum([dice for dice in roll if dice == 4])
    elif category == "Fives":
      self.scores["Fives"] = sum([dice for dice in roll if dice == 5])
    elif category == "Sixes":
      self.scores["Sixes"] = sum([dice for dice in roll if dice == 6])
    elif category == "Three of a Kind":
      self.scores["Three of a Kind"] = sum(roll) if self._check_n_of_a_kind(roll, 3) else 0
    elif category == "Four of a Kind":
      self.scores["Four of a Kind"] = sum(roll) if self._check_n_of_a_kind(roll, 4) else 0
    elif category == "Full House":
      self.scores["Full House"] = 25 if self._check_full_house(roll) else 0
    elif category == "Small Straight":
      self.scores["Small Straight"] = 30 if self._check_straight(roll, 4) else 0
    elif category == "Large Straight":
      self.scores["Large Straight"] = 40 if self._check_straight(roll, 5) else 0
    elif category == "Yahtzee":
      self.scores["Yahtzee"] = 50 if self._check_n_of_a_kind(roll, 5) else 0
    elif category == "Chance":
      self.scores["Chance"] = sum(roll)

  def _check_n_of_a_kind(self, roll, n):
    return any(roll.count(value) >= n for value in set(roll))

  def _check_full_house(self, roll):
    unique_values, counts = np.unique(roll, return_counts=True)
    # Check if there are exactly two unique values (three of one number and two of another)
    if len(unique_values) == 2:
        # Check if counts are either 2 and 3, or 3 and 2
        if (counts == 2).sum() == 1 and (counts == 3).sum() == 1:
            return True
    return False

  def _check_straight(self, roll, length):
    return sorted(set(roll)) == list(range(min(roll), min(roll) + length))

### Scoring Helper Functions

In [49]:
def choose_available_category_naive(card):
  """
  A helper function which adds points to the scorecard based on the first unfilled category in the scorecard
  """
  for category, value in card.scores.items():
    if value is None:
      return category
  return None
  

### Strategy Functions

In [64]:

def naive_sequential_strategy(card, verbose=False):
  """
  This strategy just rolls the dice once and selects the first available option in the scorecard,
  effectively discarding the 2nd and 3rd rolls as opportunities to maximize points.
  """
  while not card.is_full():
    roll = roll_dice()
    if verbose:
      print(roll, end="  ")
    available_category = choose_available_category_naive(card)
    if verbose:
      print(f'Category Selection: {available_category}')
    card.update_score(roll, available_category)
  return card

def naive_point_aware_strategy(card, verbose=False):
  """
  This strategy just rolls the dice once and selects the option in the scorecard that yields maximum points.
  It discards the 2nd and 3rd roll as opportunities to maximize points.
  """
  while not card.is_full():
    roll = roll_dice()
    if verbose:
      print(roll, end="  ")
    scores = card.check_potential_scores(roll)
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    flag = True
    # iterate through possible point-scoring categories
    for category, category_score in sorted_scores:
      if card.scores[category] is None:
        card.scores[category] = category_score
        flag = False
        if verbose:
          print(f'Category Selection: {category} ({category_score})')
        break
    # if no opportunity to score points, take a 0 in the first available category on the scorecard
    if flag:
      available_category = choose_available_category_naive(card)
      if verbose:
        print(f'Category Selection: {available_category} (0)')
      card.update_score(roll, available_category)
  return card

In [65]:
# Create a Yahtzee card object
card = Yahtzee()

# Run the simple strategy on the card
naive_sequential_strategy(card, verbose=True)

# Print the final scores
print("Yahtzee Scorecard:")
for category, score in card.scores.items():
  print(f"{category}: {score}")

[6, 5, 4, 3, 1]  Category Selection: Ones
[3, 4, 5, 4, 2]  Category Selection: Twos
[2, 4, 3, 6, 1]  Category Selection: Threes
[6, 1, 1, 3, 6]  Category Selection: Fours
[1, 4, 5, 6, 2]  Category Selection: Fives
[2, 3, 3, 3, 3]  Category Selection: Sixes
[5, 3, 1, 6, 6]  Category Selection: Three of a Kind
[6, 4, 3, 4, 5]  Category Selection: Four of a Kind
[6, 4, 3, 2, 3]  Category Selection: Full House
[1, 3, 4, 2, 1]  Category Selection: Small Straight
[5, 5, 1, 4, 4]  Category Selection: Large Straight
[3, 6, 6, 1, 6]  Category Selection: Yahtzee
[2, 5, 6, 2, 1]  Category Selection: Chance
Yahtzee Scorecard:
Ones: 1
Twos: 2
Threes: 3
Fours: 0
Fives: 5
Sixes: 0
Three of a Kind: 0
Four of a Kind: 0
Full House: 0
Small Straight: 30
Large Straight: 0
Yahtzee: 0
Chance: 16


In [66]:
# Create a Yahtzee card object
card = Yahtzee()

# Run the simple strategy on the card
naive_point_aware_strategy(card, verbose=True)

# Print the final scores
print("Yahtzee Scorecard:")
for category, score in card.scores.items():
  print(f"{category}: {score}")

[5, 4, 2, 3, 5]  Category Selection: Small Straight (30)
[3, 1, 6, 6, 3]  Category Selection: Chance (19)
[3, 2, 3, 6, 4]  Category Selection: Threes (6)
[5, 3, 2, 4, 1]  Category Selection: Large Straight (40)
[1, 3, 2, 3, 5]  Category Selection: Fives (5)
[1, 6, 3, 3, 4]  Category Selection: Sixes (6)
[3, 6, 6, 1, 4]  Category Selection: Fours (4)
[2, 2, 4, 4, 5]  Category Selection: Twos (4)
[1, 4, 6, 4, 3]  Category Selection: Ones (1)
[2, 2, 4, 1, 5]  Category Selection: Three of a Kind (0)
[1, 3, 6, 2, 4]  Category Selection: Four of a Kind (0)
[3, 6, 4, 6, 5]  Category Selection: Full House (0)
[3, 1, 1, 3, 2]  Category Selection: Yahtzee (0)
Yahtzee Scorecard:
Ones: 1
Twos: 4
Threes: 6
Fours: 4
Fives: 5
Sixes: 6
Three of a Kind: 0
Four of a Kind: 0
Full House: 0
Small Straight: 30
Large Straight: 40
Yahtzee: 0
Chance: 19


In [53]:
def simple_strategy(card):
  while not card.is_full():
    roll = roll_dice()
    card.update_score(roll, choose_available_category(card))
  return card

def fill_down_strategy(card):
  while not card.is_full():
    empty_categories = [category for category, value in card.scores.items() if value is None]
    roll = roll_dice()
    card.update_score(roll, empty_categories[0])
  return card



def keep_mode_strategy(card):
  while not card.is_full():
    roll = roll_dice()
    for _ in range(2):
      roll = reroll_dice(roll, keep_most_frequent

SyntaxError: unexpected EOF while parsing (3253692562.py, line 20)

In [4]:

# sample die 1000 times
walk5 = []
for i in range(1,1000) :
    roll = roll_die()
    walk5 = np.concatenate((walk5, roll))

#plt.hist(walk5)

# histogram of results 
walks = pd.DataFrame({'Face': walk5[:]}) # must be pandas df
ggplot(walks) +\
 aes(x = 'Face') +\
 geom_histogram(color = 'white', bins = 6) +\
 scale_x_continuous(breaks = range(1,7))

#run the function 1000 times for rolling 1,2,3,4,5 dice
walk1 = []; walk2 = []; walk3 = []; walk4 = []; walk5 = []
for i in range(1,1000) :
    roll = roll_die()
    walk5 = np.concatenate((walk5, roll))
    
    roll = roll_die(ndie = 4)
    walk4 = np.concatenate((walk4, roll))
    
    roll = roll_die(ndie = 3)
    walk3 = np.concatenate((walk3, roll))
    
    roll = roll_die(ndie = 2)
    walk2 = np.concatenate((walk2, roll))
    
    roll = roll_die(ndie = 1)
    walk1 = np.concatenate((walk1, roll))


# build dataframe of each series of rolls and stack them together
walks = pd.concat([pd.DataFrame({'Rolls': walk5, 'Type': "five"}),
                   pd.DataFrame({'Rolls': walk4, 'Type': "four"}),
                   pd.DataFrame({'Rolls': walk3, 'Type': "three"}),
                   pd.DataFrame({'Rolls': walk2, 'Type': "two"}),
                   pd.DataFrame({'Rolls': walk1, 'Type': "one"})])



In [5]:
walks

Unnamed: 0,Rolls,Type
0,2.0,five
1,2.0,five
2,6.0,five
3,6.0,five
4,6.0,five
...,...,...
994,2.0,one
995,4.0,one
996,4.0,one
997,6.0,one


In [6]:
# histogram of the results across different sized rolls
ggplot(walks) +\
 aes(x = 'Rolls') +\
 geom_histogram(color = 'white', bins = 6) +\
 scale_x_continuous(breaks = range(1,7)) +\
 facet_wrap('Type')

del(walk5, walk4, walk3, walk2, walk1, walks, roll, i)

# =============================================================================
# define function to calculate box score
# =============================================================================

def calculate_score(roll_results, verbose = False) :
    #function returns the maximum score for a dice throw
    #verbose argument prints out the score sheet
     
    #sort roll_results for readability
    roll_results.sort()
    
    #grab the number of die per face
    Ones = roll_results[roll_results == 1]
    Twos = roll_results[roll_results == 2]
    Threes = roll_results[roll_results == 3]
    Fours = roll_results[roll_results == 4]
    Fives = roll_results[roll_results == 5]
    Sixes = roll_results[roll_results == 6]
    
    #calculate frequency of each die  
    counts = []
    for count in [Ones, Twos, Threes, Fours, Fives, Sixes]:
        counts.append(len(count))
    
    #convert to numpy array
    counts = np.array(counts)
    
    #find unique number of faces then check for kinds
    face_sum = sum(roll_results)
    two_of_kind = any(counts == 2) #exactly two of a kind
    three_of_kind = any(counts == 3) * face_sum #exactly three of a kind
    four_of_kind = any(counts == 4) * face_sum#exactly four of a kind
    yahtzee = any(counts == 5) * 50 #exactly five of a kind
    
    #full house
    full_house = (two_of_kind & (three_of_kind > 0)) * 25
    
    #calculate if there is a small straight by checking if the die match either 1:4 or 2:5 or 3:6
    straight4 = (all(x in roll_results for x in range(1,5)) or
                 all(x in roll_results for x in range(2,6)) or
                 all(x in roll_results for x in range(3,7))) * 30
    
    #calculate if there is a large straight by checking if the die match 1:5 or 2:6
    straight5 = (all(x in roll_results for x in range(1,6)) or
                 all(x in roll_results for x in range(2,7))) * 40
    
    #https://stackoverflow.com/questions/54368328/is-there-a-panda-equivalent-to-tribble
    def tribble(columns, *data):
        return pd.DataFrame(
            data=list(zip(*[iter(data)]*len(columns))),
            columns=columns
        )
    
    #dataframe of the results
    results = tribble(
            ["Result", "Score"],
            "Roll results", np.array2string(roll_results, separator = "-"),
            "Ones", sum(Ones),
            "Twos", sum(Twos),
            "Threes", sum(Threes),
            "Fours", sum(Fours),
            "Fives", sum(Fives),
            "Sixes", sum(Sixes),
            "3 of a kind", three_of_kind,
            "4 of a kind", four_of_kind,
            "Full house", full_house,
            "Small straight", straight4,
            "Large straight", straight5,
            "Chance", sum(roll_results),
            "YAHTZEE", yahtzee
      )
    
    if verbose : print(results)
    best_result = max(results.iloc[range(1,14),1])
    return(best_result)


roll_results = roll_die()
roll_results = np.array([3,3,3,4,3])
print(roll_results)
score = calculate_score(roll_results = roll_results, verbose = True)
print(score)

# =============================================================================
# simulate scores
# =============================================================================

# play 10000 rolls of yahtzee
scores = []
for i in range(10000) :
    score = calculate_score(roll_results = roll_die())
    scores.append(score)

# density plot of the scores
scores = pd.DataFrame({'Score': scores}) # must be pandas df
ggplot(scores) +\
 aes(x = 'Score') +\
 geom_density()
print(scores)

[3 3 3 4 3]
            Result        Score
0     Roll results  [3-3-3-3-4]
1             Ones            0
2             Twos            0
3           Threes           12
4            Fours            4
5            Fives            0
6            Sixes            0
7      3 of a kind            0
8      4 of a kind           16
9       Full house            0
10  Small straight            0
11  Large straight            0
12          Chance           16
13         YAHTZEE            0
16
      Score
0        16
1        17
2        17
3        21
4        19
...     ...
9995     19
9996     20
9997     16
9998     20
9999     40

[10000 rows x 1 columns]
