In [6]:
import numpy as np
import pandas as pd

from __future__ import division

### Problem Description
The game DieN is played in the following way. Consider a die with N sides ͈where N is an integer greater than 1͉ and a nonempty set B of integers. 

<br>
The rules of the game are: 
<br>1. You start with 0 dollars. 
<br>2. Roll an N-sided die with a different number from 1 to N printed on each side. 
     <br>&nbsp;&nbsp; a. If you roll a number not in B, you receive that many dollars. ͈eg. if you roll the number 2 and 2 is not in B, then you receive 2 dollars.͉ 
    <br>&nbsp;&nbsp; b. If you roll a number in B, then you lose all of your obtained money and the game ends. 
<br>3. After you roll the die ͈and don’t roll a number in B͉, you have the option to quit the game. If you quit, you keep all the money you’ve earned to that point. If you continue to roll, go back to step 2. 

### Goal
For this problem, determine an optimal policy for playing the game DieN with N sides. You will be given N and an array B isBadSide which indicates which sides are bad. The policy should depend on your current bankroll.

<br>
Also, let's compute the expected value if we follow an optimal policy?

In [63]:
class NSidedDice:
    def __init__(self, B):
        '''
        INPUT
            N: integer. Represents number of sides on die.
            B: tuple of 0's and 1's. 1 indices represent numbers s.t. the game ends.
        '''
        self.N = len(B)
        self.B = B
        self.fail_prob = np.sum(B) / len(B)
        # state is amount of money in bank account. We start with none.
        self.state = 0
        # Let's create a list of numbers where we will win money.
        self.win_indices = [n for n in range(self.N) if self.B[n] == 0]
        # We also want a corresponding list of paths' probabilities and those
        # paths' end values.
        self.path_probs = []
        self.path_values = []
        # current_prob keeps track of the probability of our current path
        self.current_prob = 1 
    
    
    def calculate_expected_values(self):
        '''
        calculates expected value of rolling dice.
        '''
        expected_gains = np.sum([(1 / self.N) * (val + 1) for val in self.win_indices])
        expected_loss = self.fail_prob * self.state
        
        self.expected_value = expected_gains - expected_loss
        

    def run_bellman(self):
        '''
        Run simulation of all possible outcomes to calculate Bellman Equation
        '''
        for i in range(self.N):
            self.current_prob *= 1 / self.N
            # case where we rolled a winning number
            if i in self.win_indices:
                self.state += (i + 1)
                self.calculate_expected_values()
                if self.expected_value < 0:
                    self.path_probs.append(self.current_prob)
                    self.path_values.append(self.state)
                # if expected value is positive, we roll again
                else:
                    self.run_bellman()
            # case where we rolled a number in B
            else:
                self.path_probs.append(self.current_prob)
                self.path_values.append(0)
        
    def get_expected_value(self):
        self.run_bellman()
        self.expected_value = np.sum(
            [prob * val for prob, val in zip(self.path_probs, self.path_values)]
        )

In [69]:
dice = NSidedDice([1,1,1,0,0,0])
dice.get_expected_value()

In [70]:
dice.path_values

[0, 0, 0, 0, 0, 0, 8, 13, 19, 24, 30]

In [71]:
dice.expected_value

6.4471305963408902e-06

In [72]:
np.sum(dice.path_probs)

0.19922839496984965