# Setup

In [None]:
# Imports
import mdptoolbox
import numpy as np
import scipy
import pandas as pd
import random

In [None]:
# Read in card types and parameters
cards = pd.read_csv('cards.csv',header=0)
cards

# Background functions for card likelihood and value calculations

In [None]:
# Function to calculate how good a card is to buy
def card_value(card):
    rows = cards[cards['Establishment']==card]
    likelihood = 0
    for i in range(len(rows)):
        row = rows.iloc[i]

        # Likelihood of a card roll
        # Depends on the number of dice
        if num_dice == 1:
            probs = {1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6, 7:0/36, 
                    8:0/36, 9:0/36, 10:0/36, 11:0/36, 12:0/36}
        elif num_dice == 2:
            probs = {1: 0/36, 2:1/36, 3:2/36, 4:3/36, 5:4/36, 6:5/36, 
                     7:6/36, 8:5/36, 9:4/36, 10:3/36, 11:2/36, 12:1/36}
        likelihood += probs[int(row['Dice Roll'])]

        # Cost of the card
        cost = row['Cost']

        # Earnings of the card
        if card == 'Cheese Factory':
            earnings = 3
        elif card == 'Furniture Factory':
            earnings = 3
        elif card == 'Fruit and Vegetable Market':
            earnings = 2
        else:
            earnings = int(row['Earnings'])

    return likelihood, earnings, cost

In [None]:
# Function to determine how many dice to use
def choose_dice(num_dice, current_cards):
    probs1 = {1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6, 7:0/36, 8:0/36, 9:0/36, 10:0/36, 11:0/36, 12:0/36}
    probs2 = {1: 0/36, 2:1/36, 3:2/36, 4:3/36, 5:4/36, 6:5/36, 7:6/36, 8:5/36, 9:4/36, 10:3/36, 11:2/36, 12:1/36}
    
    if num_dice == 1:
        return num_dice
    elif num_dice == 2:
        dice_vote = [0,0]
        for card in current_cards:
            for row in cards[cards['Establishment']==card]['Dice Roll']:
                if (probs1[row] > probs2[row]) and player_dice==2:
                    dice_vote[0] += 1
                else:
                    dice_vote[1] += 1
        return (np.argmax(dice_vote)+1)

# Game Initialization

In [None]:
# Generate middle pool of cards
all_cards = ['Wheat Field']*6 + ['Ranch']*6 + ['Forest']*6 + ['Mine']*6 + ['Apple Orchard']*6 + ['Bakery']*6 + ['Convenience Store']*6 + ['Cheese Factory']*6 + ['Furniture Factory']*6 + ['Fruit and Vegetable Market']*6
card_stack = random.sample(all_cards, 60)
current_pool = card_stack[0:6]
del card_stack[0:6]

In [None]:
# Initialize the Game
player_cards = ['Wheat Field', 'Bakery']
player_coins = 3
player_landmarks = 0
player_dice = 1

computer_cards = ['Wheat Field', 'Bakery']
computer_coins = 3
computer_landmarks = 0
computer_dice = 1

upgrade_costs = [4, 10, 16, 22]

current_player = 'player'

# Play a game until someone wins with the reward vectors:
## (0, 1, 2, 3, 4) for purchases and passing

In [None]:
# Play until someone wins

# This method uses fixed rewards for all decisions, with increasing rewards for additional landmarks. (0, 1, 2, 3, 4)

number_of_turns = 0
while (player_landmarks < 4) and (computer_landmarks < 4): 
    number_of_turns += 1

    if current_player == 'player':
        num_dice = choose_dice(player_dice, player_cards)
    elif current_player == 'computer':
        num_dice = choose_dice(computer_dice, computer_cards)

    print(current_player + " is rolling " + str(num_dice) + " dice.")
    # Roll the dice - either 1 or 2 depending on whether the Train Station is unlocked
    if num_dice == 1:
        dice_roll = np.random.choice([1,2,3,4,5])
    elif num_dice == 2:
        dice_roll = np.random.choice([2,3,4,5,6,7,8,9,10,11,12], 
                                     p=[1/36, 2/36, 3/36, 4/36, 5/36, 6/36, 
                                        5/36, 4/36, 3/36, 2/36, 1/36])

    print(current_player + " rolled a " + str(dice_roll))

    # Add coins if the player has that Establishment, according to the card types
    if current_player == 'player':
        # If the player rolled
        for index, row in cards[cards['Dice Roll']==dice_roll].iterrows():
            if row['Type'] == 1:
                # Everyone with the Establishment earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
                if row['Establishment'] in computer_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])
            elif row['Type'] == 2:
                # Only the person who rolled earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
    elif current_player == 'computer':
        # If the computer rolled
        for index, row in cards[cards['Dice Roll']==dice_roll].iterrows():
            if row['Type'] == 1:
                # Everyone with the Establishment earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
                if row['Establishment'] in computer_cards: 
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])
            elif row['Type'] == 2:
                if row['Establishment'] in computer_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])

    if current_player == 'player':
        if player_coins == 0:
            player_coins += 1
            print("player earned 1 coin for having none")
    elif current_player == 'computer':
        if computer_coins == 0:
            computer_coins += 1
            print("computer earned 1 coin for having none")

    print("\nCurrent Score:")
    print("Player:")
    print("Player Cards: " + str(player_cards))
    print("Player Coins: " + str(player_coins))
    print("Player Landmarks: " + str(player_landmarks))

    print("Computer:")
    print("Computer Cards: " + str(computer_cards))
    print("Computer Coins: " + str(computer_coins))
    print("Computer Landmarks: " + str(computer_landmarks))


    # Choice of construction (or pass)
    # 1) Construct options based on current coin bank
    if current_player == 'player':
        can_buy = cards[(cards['Cost']<=player_coins) & (cards['Establishment'].isin(current_pool))]
        can_upgrade = upgrade_costs[player_landmarks] <= player_coins
    elif current_player == 'computer':
        can_buy = cards[(cards['Cost']<=computer_coins) & (cards['Establishment'].isin(current_pool))]
        can_upgrade = upgrade_costs[computer_landmarks] <= computer_coins


    # 2) Create probability distributions and reward vectors
    # Calculate how likely transition to next state is based on current card holdings
    transition_probabilities_current = np.zeros(4)
    if current_player == 'player':
        for card in player_cards:
            likelihood, earnings, cost = card_value(card)
            if card == 'Cheese Factory':
                earnings = earnings*player_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(player_cards.count('Forest')+player_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
            transition_probabilities_current += ((likelihood*earnings)/np.array(upgrade_costs))
    elif current_player == 'computer':
        for card in computer_cards:
            likelihood, earnings, cost = card_value(card)
            if card == 'Cheese Factory':
                earnings = earnings*computer_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(computer_cards.count('Forest')+computer_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
            transition_probabilities_current += ((likelihood*earnings)/np.array(upgrade_costs))

    print("\nCurrent transition probabilities:")
    print(transition_probabilities_current)

    # Get the probabilities into the expected matrix form 
    Ps = []
    Pmat = []
    for i in range(0,4):
        Pmat_row = []
        for j in range(0,5):
            if j == i:
                Pmat_row.append(1-transition_probabilities_current[i])
            elif j == i+1:
                Pmat_row.append(transition_probabilities_current[i])
            else: 
                Pmat_row.append(0.)
        Pmat.append(Pmat_row)
    Pmat.append([0.,0.,0.,0.,1])
    Ps.append(Pmat)


    # Calculate how likely transition to next state is for each card that can be purchased
    for index, row in can_buy.iterrows():
        likelihood, earnings, cost = card_value(row['Establishment'])
        if current_player == 'player':
            if card == 'Cheese Factory':
                earnings = earnings*player_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(player_cards.count('Forest')+player_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
            elif card == 'Ranch' and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Cheese Factory')*3)
                likelihood += 6/36
            elif (card == 'Forest' or card == 'Mine') and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Furniture Factory')*3)
                likelihood += 7/36
            elif (card == 'Wheat Field' or card == 'Apple Orchard') and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Fruit and Vegetable Market')*2)
                likelihood += 3/36
        elif current_player == 'computer':
            if card == 'Cheese Factory':
                earnings = earnings*computer_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(computer_cards.count('Forest')+computer_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
            elif card == 'Ranch' and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Cheese Factory')*3)
                likelihood += 6/36
            elif (card == 'Forest' or card == 'Mine') and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Furniture Factory')*3)
                likelihood += 7/36
            elif (card == 'Wheat Field' or card == 'Apple Orchard') and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Fruit and Vegetable Market')*2)
                likelihood += 3/36
        print("Buying the card: " + row['Establishment'] + " results in the following improvement")
        possible_new_probabilities = transition_probabilities_current + ((likelihood*earnings)/np.array(upgrade_costs))
        print(possible_new_probabilities)
        print("The one time cost is " + str(row['Cost']))

        Pmat = []
        for i in range(0,4):
            Pmat_row = []
            for j in range(0,5):
                if j == i:
                    Pmat_row.append(1-possible_new_probabilities[i])
                elif j == i+1:
                    Pmat_row.append(possible_new_probabilities[i])
                else: 
                    Pmat_row.append(0.)
            Pmat.append(Pmat_row)
        Pmat.append([0.,0.,0.,0.,1])
        Ps.append(Pmat)

    # 3) MDP of choices with a discount factor of 0.9

    # Compare the policies of:
    # 1) Doing nothing
    # 2) Buy one of the cards in the central pool
    Ps = np.array(Ps)
    Rs = np.array([[0.]*len(Ps),[1.]*len(Ps),[2.]*len(Ps),[3.]*len(Ps),[4.]*len(Ps)])

    vi = mdptoolbox.mdp.ValueIteration(Ps, Rs, 0.9) #discount value of 0.9
    vi.run()

    if current_player == 'player':
        policy = vi.policy[player_landmarks]
    elif current_player == 'computer':
        policy = vi.policy[computer_landmarks]

    if can_upgrade:
        print("\n" + current_player + " will upgrade.")
        # Increase landmarks
        # Decrease coins
        if current_player == 'player':
            player_coins -= upgrade_costs[player_landmarks]
            player_landmarks += 1
        elif current_player == 'computer':
            computer_coins -= upgrade_costs[computer_landmarks]
            computer_landmarks += 1
    else:
        print("\nThe best policy is: " + str(policy))
        if policy==0:
            print(current_player + " will pass.")
        else:
            # 4) Subtract from coin bank and add to holdings if relevant
            # subtract coins from the player's bank
            # add the card to the player's holdings
            print(current_player + " will buy:")
            to_buy = list(can_buy['Establishment'])[policy-1]
            print(to_buy)
            if current_player == 'player':
                player_coins -= int(list(can_buy[can_buy['Establishment']==to_buy]['Cost'])[0])
                player_cards.append(to_buy)
            elif current_player == 'computer':
                computer_coins -= int(list(can_buy[can_buy['Establishment']==to_buy]['Cost'])[0])
                computer_cards.append(to_buy)

            # pop the card from the current pool
            current_pool.pop(current_pool.index(to_buy))

            # add a new card to the current pool and pop it from the full list of remaining cards
            current_pool.append(card_stack[0])
            card_stack.pop(0)

    # Increase the number of dice if someone has unlocked the first landmark
    if player_landmarks>=1:
        player_dice = 2
    if computer_landmarks>=1:
        computer_dice = 2


    # Switch players
    if current_player == 'player':
        current_player = 'computer'
    elif current_player == 'computer':
        current_player = 'player'


    # Determine when someone has won
    if player_landmarks==4:
        print("Game over. player has won!")
    elif computer_landmarks==4:
        print("Game over. computer has won!")
        
    print("\n\n")

print("This took " + str(number_of_turns) + " turns.")

# Play a game until someone wins with the reward vectors:
## (0, 1, 2, 3, 4) for purchases 
## (0, 2, 4, 6, 8) for passing

In [None]:
# Play until someone wins

# This method uses fixed rewards for all decisions, with increasing rewards for additional landmarks 0, 1, 2, 3, 4
# *except* for the reward for doing nothing, which has values 0, 2, 4, 6, 8

number_of_turns = 0
while (player_landmarks < 4) and (computer_landmarks < 4): 
    number_of_turns += 1
    
    if current_player == 'player':
        num_dice = choose_dice(player_dice, player_cards)
    elif current_player == 'computer':
        num_dice = choose_dice(computer_dice, computer_cards)

    print(current_player + " is rolling " + str(num_dice) + " dice.")
    # Roll the dice - either 1 or 2 depending on whether the Train Station is unlocked
    if num_dice == 1:
        dice_roll = np.random.choice([1,2,3,4,5])
    elif num_dice == 2:
        dice_roll = np.random.choice([2,3,4,5,6,7,8,9,10,11,12], 
                                     p=[1/36, 2/36, 3/36, 4/36, 5/36, 6/36, 
                                        5/36, 4/36, 3/36, 2/36, 1/36])

    print(current_player + " rolled a " + str(dice_roll))

    # Add coins if the player has that Establishment, according to the card types
    if current_player == 'player':
        # If the player rolled
        for index, row in cards[cards['Dice Roll']==dice_roll].iterrows():
            if row['Type'] == 1:
                # Everyone with the Establishment earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
                if row['Establishment'] in computer_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])
            elif row['Type'] == 2:
                # Only the person who rolled earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
    elif current_player == 'computer':
        # If the computer rolled
        for index, row in cards[cards['Dice Roll']==dice_roll].iterrows():
            if row['Type'] == 1:
                # Everyone with the Establishment earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
                if row['Establishment'] in computer_cards: 
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])
            elif row['Type'] == 2:
                if row['Establishment'] in computer_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])

    if current_player == 'player':
        if player_coins == 0:
            player_coins += 1
            print("player earned 1 coin for having none")
    elif current_player == 'computer':
        if computer_coins == 0:
            computer_coins += 1
            print("computer earned 1 coin for having none")

    print("\nCurrent Score:")
    print("Player:")
    print("Player Cards: " + str(player_cards))
    print("Player Coins: " + str(player_coins))
    print("Player Landmarks: " + str(player_landmarks))

    print("Computer:")
    print("Computer Cards: " + str(computer_cards))
    print("Computer Coins: " + str(computer_coins))
    print("Computer Landmarks: " + str(computer_landmarks))


    # Choice of construction (or pass)
    # 1) Construct options based on current coin bank
    if current_player == 'player':
        can_buy = cards[(cards['Cost']<=player_coins) & (cards['Establishment'].isin(current_pool))]
        can_upgrade = upgrade_costs[player_landmarks] <= player_coins
    elif current_player == 'computer':
        can_buy = cards[(cards['Cost']<=computer_coins) & (cards['Establishment'].isin(current_pool))]
        can_upgrade = upgrade_costs[computer_landmarks] <= computer_coins


    # 2) Create probability distributions and reward vectors
    # Calculate how likely transition to next state is based on current card holdings
    transition_probabilities_current = np.zeros(4)
    if current_player == 'player':
        for card in player_cards:
            likelihood, earnings, cost = card_value(card)
            if card == 'Cheese Factory':
                earnings = earnings*player_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(player_cards.count('Forest')+player_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
            transition_probabilities_current += ((likelihood*earnings)/np.array(upgrade_costs))
    elif current_player == 'computer':
        for card in computer_cards:
            likelihood, earnings, cost = card_value(card)
            if card == 'Cheese Factory':
                earnings = earnings*computer_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(computer_cards.count('Forest')+computer_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
            transition_probabilities_current += ((likelihood*earnings)/np.array(upgrade_costs))

    print("\nCurrent transition probabilities:")
    print(transition_probabilities_current)

    # Get the probabilities into the expected matrix form 
    Ps = []
    Pmat = []
    for i in range(0,4):
        Pmat_row = []
        for j in range(0,5):
            if j == i:
                Pmat_row.append(1-transition_probabilities_current[i])
            elif j == i+1:
                Pmat_row.append(transition_probabilities_current[i])
            else: 
                Pmat_row.append(0.)
        Pmat.append(Pmat_row)
    Pmat.append([0.,0.,0.,0.,1])
    Ps.append(Pmat)


    # Calculate how likely transition to next state is for each card that can be purchased
    for index, row in can_buy.iterrows():
        likelihood, earnings, cost = card_value(row['Establishment'])
        if current_player == 'player':
            if card == 'Cheese Factory':
                earnings = earnings*player_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(player_cards.count('Forest')+player_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
            elif card == 'Ranch' and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Cheese Factory')*3)
                likelihood += 6/36
            elif (card == 'Forest' or card == 'Mine') and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Furniture Factory')*3)
                likelihood += 7/36
            elif (card == 'Wheat Field' or card == 'Apple Orchard') and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Fruit and Vegetable Market')*2)
                likelihood += 3/36
        elif current_player == 'computer':
            if card == 'Cheese Factory':
                earnings = earnings*computer_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(computer_cards.count('Forest')+computer_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
            elif card == 'Ranch' and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Cheese Factory')*3)
                likelihood += 6/36
            elif (card == 'Forest' or card == 'Mine') and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Furniture Factory')*3)
                likelihood += 7/36
            elif (card == 'Wheat Field' or card == 'Apple Orchard') and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Fruit and Vegetable Market')*2)
                likelihood += 3/36
        print("Buying the card: " + row['Establishment'] + " results in the following improvement")
        possible_new_probabilities = transition_probabilities_current + ((likelihood*earnings)/np.array(upgrade_costs))
        print(possible_new_probabilities)
        print("The one time cost is " + str(row['Cost']))

        Pmat = []
        for i in range(0,4):
            Pmat_row = []
            for j in range(0,5):
                if j == i:
                    Pmat_row.append(1-possible_new_probabilities[i])
                elif j == i+1:
                    Pmat_row.append(possible_new_probabilities[i])
                else: 
                    Pmat_row.append(0.)
            Pmat.append(Pmat_row)
        Pmat.append([0.,0.,0.,0.,1])
        Ps.append(Pmat)

    # 3) MDP of choices with a discount factor of 0.9

    # Compare the policies of:
    # 1) Doing nothing
    # 2) Buy one of the cards in the central pool
    Ps = np.array(Ps)
    Rs = np.array([[0.]+[0.]*(len(Ps)-1),[2.]+[1.]*(len(Ps)-1),[4.]+[2.]*(len(Ps)-1),[6.]+[3.]*(len(Ps)-1),[8.]+[4.]*(len(Ps)-1)])

    vi = mdptoolbox.mdp.ValueIteration(Ps, Rs, 0.9) #discount value of 0.9
    vi.run()

    if current_player == 'player':
        policy = vi.policy[player_landmarks]
    elif current_player == 'computer':
        policy = vi.policy[computer_landmarks]

    if can_upgrade:
        print("\n" + current_player + " will upgrade.")
        # Increase landmarks
        # Decrease coins
        if current_player == 'player':
            player_coins -= upgrade_costs[player_landmarks]
            player_landmarks += 1
        elif current_player == 'computer':
            computer_coins -= upgrade_costs[computer_landmarks]
            computer_landmarks += 1
    else:
        print("\nThe best policy is: " + str(policy))
        if policy==0:
            print(current_player + " will pass.")
        else:
            # 4) Subtract from coin bank and add to holdings if relevant
            # subtract coins from the player's bank
            # add the card to the player's holdings
            print(current_player + " will buy:")
            to_buy = list(can_buy['Establishment'])[policy-1]
            print(to_buy)
            if current_player == 'player':
                player_coins -= int(list(can_buy[can_buy['Establishment']==to_buy]['Cost'])[0])
                player_cards.append(to_buy)
            elif current_player == 'computer':
                computer_coins -= int(list(can_buy[can_buy['Establishment']==to_buy]['Cost'])[0])
                computer_cards.append(to_buy)

            # pop the card from the current pool
            current_pool.pop(current_pool.index(to_buy))

            # add a new card to the current pool and pop it from the full list of remaining cards
            current_pool.append(card_stack[0])
            card_stack.pop(0)

    # Increase the number of dice if someone has unlocked the first landmark
    if player_landmarks>=1:
        player_dice = 2
    if computer_landmarks>=1:
        computer_dice = 2


    # Switch players
    if current_player == 'player':
        current_player = 'computer'
    elif current_player == 'computer':
        current_player = 'player'


    # Determine when someone has won
    if player_landmarks==4:
        print("Game over. player has won!")
    elif computer_landmarks==4:
        print("Game over. computer has won!")
        
    print("\n\n")

print("This took " + str(number_of_turns) + " turns.")

# Play a game until someone wins with the reward vectors:
## (1, 2, 3, 4, 5) for purchases 
## (1, 2, 6, 8, 10) for passing

In [None]:
# Play until someone wins

# This method uses fixed rewards for all decisions, with increasing rewards for additional landmarks 1, 2, 3, 4, 5
# *except* for the reward for doing nothing, which has values 1, 2, 6, 8, 10

number_of_turns = 0
while (player_landmarks < 4) and (computer_landmarks < 4): 
    number_of_turns += 1
    
    if current_player == 'player':
        num_dice = choose_dice(player_dice, player_cards)
    elif current_player == 'computer':
        num_dice = choose_dice(computer_dice, computer_cards)

    print(current_player + " is rolling " + str(num_dice) + " dice.")
    # Roll the dice - either 1 or 2 depending on whether the Train Station is unlocked
    if num_dice == 1:
        dice_roll = np.random.choice([1,2,3,4,5])
    elif num_dice == 2:
        dice_roll = np.random.choice([2,3,4,5,6,7,8,9,10,11,12], 
                                     p=[1/36, 2/36, 3/36, 4/36, 5/36, 6/36, 
                                        5/36, 4/36, 3/36, 2/36, 1/36])

    print(current_player + " rolled a " + str(dice_roll))

    # Add coins if the player has that Establishment, according to the card types
    if current_player == 'player':
        # If the player rolled
        for index, row in cards[cards['Dice Roll']==dice_roll].iterrows():
            if row['Type'] == 1:
                # Everyone with the Establishment earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
                if row['Establishment'] in computer_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])
            elif row['Type'] == 2:
                # Only the person who rolled earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
    elif current_player == 'computer':
        # If the computer rolled
        for index, row in cards[cards['Dice Roll']==dice_roll].iterrows():
            if row['Type'] == 1:
                # Everyone with the Establishment earns coins
                if row['Establishment'] in player_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*player_cards.count('Cheese Factory')*player_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*player_cards.count('Furniture Factory')*(player_cards.count('Forest')+player_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*player_cards.count('Fruit and Vegetable Market')*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*player_cards.count(row['Establishment'])
                    player_coins += int(earnings)
                    print("player earned " + str(earnings) + " for having a " + row['Establishment'])
                if row['Establishment'] in computer_cards: 
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])
            elif row['Type'] == 2:
                if row['Establishment'] in computer_cards:
                    if row['Establishment'] == 'Cheese Factory':
                        earnings = 3*computer_cards.count('Cheese Factory')*computer_cards.count('Ranch')
                    elif row['Establishment'] == 'Furniture Factory':
                        earnings = 3*computer_cards.count('Furniture Factory')*(computer_cards.count('Forest')+computer_cards.count('Mine'))
                    elif row['Establishment'] == 'Fruit and Vegetable Market':
                        earnings = 2*computer_cards.count('Fruit and Vegetable Market')*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
                    else:
                        earnings = int(row['Earnings'])*computer_cards.count(row['Establishment'])
                    computer_coins += int(earnings)
                    print("computer earned " + str(earnings) + " for having a " + row['Establishment'])

    if current_player == 'player':
        if player_coins == 0:
            player_coins += 1
            print("player earned 1 coin for having none")
    elif current_player == 'computer':
        if computer_coins == 0:
            computer_coins += 1
            print("computer earned 1 coin for having none")

    print("\nCurrent Score:")
    print("Player:")
    print("Player Cards: " + str(player_cards))
    print("Player Coins: " + str(player_coins))
    print("Player Landmarks: " + str(player_landmarks))

    print("Computer:")
    print("Computer Cards: " + str(computer_cards))
    print("Computer Coins: " + str(computer_coins))
    print("Computer Landmarks: " + str(computer_landmarks))


    # Choice of construction (or pass)
    # 1) Construct options based on current coin bank
    if current_player == 'player':
        can_buy = cards[(cards['Cost']<=player_coins) & (cards['Establishment'].isin(current_pool))]
        can_upgrade = upgrade_costs[player_landmarks] <= player_coins
    elif current_player == 'computer':
        can_buy = cards[(cards['Cost']<=computer_coins) & (cards['Establishment'].isin(current_pool))]
        can_upgrade = upgrade_costs[computer_landmarks] <= computer_coins


    # 2) Create probability distributions and reward vectors
    # Calculate how likely transition to next state is based on current card holdings
    transition_probabilities_current = np.zeros(4)
    if current_player == 'player':
        for card in player_cards:
            likelihood, earnings, cost = card_value(card)
            if card == 'Cheese Factory':
                earnings = earnings*player_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(player_cards.count('Forest')+player_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
            transition_probabilities_current += ((likelihood*earnings)/np.array(upgrade_costs))
    elif current_player == 'computer':
        for card in computer_cards:
            likelihood, earnings, cost = card_value(card)
            if card == 'Cheese Factory':
                earnings = earnings*computer_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(computer_cards.count('Forest')+computer_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
            transition_probabilities_current += ((likelihood*earnings)/np.array(upgrade_costs))

    print("\nCurrent transition probabilities:")
    print(transition_probabilities_current)

    # Get the probabilities into the expected matrix form 
    Ps = []
    Pmat = []
    for i in range(0,4):
        Pmat_row = []
        for j in range(0,5):
            if j == i:
                Pmat_row.append(1-transition_probabilities_current[i])
            elif j == i+1:
                Pmat_row.append(transition_probabilities_current[i])
            else: 
                Pmat_row.append(0.)
        Pmat.append(Pmat_row)
    Pmat.append([0.,0.,0.,0.,1])
    Ps.append(Pmat)


    # Calculate how likely transition to next state is for each card that can be purchased
    for index, row in can_buy.iterrows():
        likelihood, earnings, cost = card_value(row['Establishment'])
        if current_player == 'player':
            if card == 'Cheese Factory':
                earnings = earnings*player_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(player_cards.count('Forest')+player_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(player_cards.count('Wheat Field')+player_cards.count('Apple Orchard'))
            elif card == 'Ranch' and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Cheese Factory')*3)
                likelihood += 6/36
            elif (card == 'Forest' or card == 'Mine') and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Furniture Factory')*3)
                likelihood += 7/36
            elif (card == 'Wheat Field' or card == 'Apple Orchard') and num_dice == 2:
                earnings += (player_cards.count(card)*player_cards.count('Fruit and Vegetable Market')*2)
                likelihood += 3/36
        elif current_player == 'computer':
            if card == 'Cheese Factory':
                earnings = earnings*computer_cards.count('Ranch')
            elif card == 'Furniture Factory':
                earnings = earnings*(computer_cards.count('Forest')+computer_cards.count('Mine'))
            elif card == 'Fruit and Vegetable Market':
                earnings = earnings*(computer_cards.count('Wheat Field')+computer_cards.count('Apple Orchard'))
            elif card == 'Ranch' and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Cheese Factory')*3)
                likelihood += 6/36
            elif (card == 'Forest' or card == 'Mine') and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Furniture Factory')*3)
                likelihood += 7/36
            elif (card == 'Wheat Field' or card == 'Apple Orchard') and num_dice == 2:
                earnings += (computer_cards.count(card)*computer_cards.count('Fruit and Vegetable Market')*2)
                likelihood += 3/36
        print("Buying the card: " + row['Establishment'] + " results in the following improvement")
        possible_new_probabilities = transition_probabilities_current + ((likelihood*earnings)/np.array(upgrade_costs))
        print(possible_new_probabilities)
        print("The one time cost is " + str(row['Cost']))

        Pmat = []
        for i in range(0,4):
            Pmat_row = []
            for j in range(0,5):
                if j == i:
                    Pmat_row.append(1-possible_new_probabilities[i])
                elif j == i+1:
                    Pmat_row.append(possible_new_probabilities[i])
                else: 
                    Pmat_row.append(0.)
            Pmat.append(Pmat_row)
        Pmat.append([0.,0.,0.,0.,1])
        Ps.append(Pmat)

    # 3) MDP of choices with a discount factor of 0.9

    # Compare the policies of:
    # 1) Doing nothing
    # 2) Buy one of the cards in the central pool
    Ps = np.array(Ps)
    Rs = np.array([[1.]+[1.]*(len(Ps)-1),[2.]+[2.]*(len(Ps)-1),[5.]+[3.]*(len(Ps)-1),[8.]+[4.]*(len(Ps)-1),[10.]+[5.]*(len(Ps)-1)])

    vi = mdptoolbox.mdp.ValueIteration(Ps, Rs, 0.9) #discount value of 0.9
    vi.run()

    if current_player == 'player':
        policy = vi.policy[player_landmarks]
    elif current_player == 'computer':
        policy = vi.policy[computer_landmarks]

    if can_upgrade:
        print("\n" + current_player + " will upgrade.")
        # Increase landmarks
        # Decrease coins
        if current_player == 'player':
            player_coins -= upgrade_costs[player_landmarks]
            player_landmarks += 1
        elif current_player == 'computer':
            computer_coins -= upgrade_costs[computer_landmarks]
            computer_landmarks += 1
    else:
        print("\nThe best policy is: " + str(policy))
        if policy==0:
            print(current_player + " will pass.")
        else:
            # 4) Subtract from coin bank and add to holdings if relevant
            # subtract coins from the player's bank
            # add the card to the player's holdings
            print(current_player + " will buy:")
            to_buy = list(can_buy['Establishment'])[policy-1]
            print(to_buy)
            if current_player == 'player':
                player_coins -= int(list(can_buy[can_buy['Establishment']==to_buy]['Cost'])[0])
                player_cards.append(to_buy)
            elif current_player == 'computer':
                computer_coins -= int(list(can_buy[can_buy['Establishment']==to_buy]['Cost'])[0])
                computer_cards.append(to_buy)

            # pop the card from the current pool
            current_pool.pop(current_pool.index(to_buy))

            # add a new card to the current pool and pop it from the full list of remaining cards
            current_pool.append(card_stack[0])
            card_stack.pop(0)

    # Increase the number of dice if someone has unlocked the first landmark
    if player_landmarks>=1:
        player_dice = 2
    if computer_landmarks>=1:
        computer_dice = 2


    # Switch players
    if current_player == 'player':
        current_player = 'computer'
    elif current_player == 'computer':
        current_player = 'player'


    # Determine when someone has won
    if player_landmarks==4:
        print("Game over. player has won!")
    elif computer_landmarks==4:
        print("Game over. computer has won!")
        
    print("\n\n")

print("This took " + str(number_of_turns) + " turns.")