In [1]:
# A Shapley Game with two players to simulate a disease infection.  
import numpy as np
import random

In [2]:
# Define states and actions
states = ['Low', 'Medium', 'High']
actions_p1 = ['Mitigate', 'DoNothing']
actions_p2 = ['Comply', 'Resist']


In [3]:
# Discount factor
gamma = 0.9


In [4]:
# Rewards: (state, action_p1, action_p2) -> (reward_p1, reward_p2)
reward_table = {
    ('Low', 'Mitigate', 'Comply'): (5, 2),
    ('Low', 'Mitigate', 'Resist'): (3, 1),
    ('Low', 'DoNothing', 'Comply'): (2, 3),
    ('Low', 'DoNothing', 'Resist'): (0, 5),

    ('Medium', 'Mitigate', 'Comply'): (6, 1),
    ('Medium', 'Mitigate', 'Resist'): (4, 0),
    ('Medium', 'DoNothing', 'Comply'): (1, 4),
    ('Medium', 'DoNothing', 'Resist'): (-2, 5),

    ('High', 'Mitigate', 'Comply'): (8, -1),
    ('High', 'Mitigate', 'Resist'): (5, -3),
    ('High', 'DoNothing', 'Comply'): (-3, 2),
    ('High', 'DoNothing', 'Resist'): (-5, 0),
}


In [5]:
# Transition probabilities
# From each state and action pair, what are the chances of moving to Low/Medium/High
transition_table = {
    ('Low', 'Mitigate', 'Comply'): [0.8, 0.2, 0.0],
    ('Low', 'Mitigate', 'Resist'): [0.6, 0.4, 0.0],
    ('Low', 'DoNothing', 'Comply'): [0.6, 0.4, 0.0],
    ('Low', 'DoNothing', 'Resist'): [0.4, 0.5, 0.1],

    ('Medium', 'Mitigate', 'Comply'): [0.6, 0.3, 0.1],
    ('Medium', 'Mitigate', 'Resist'): [0.4, 0.4, 0.2],
    ('Medium', 'DoNothing', 'Comply'): [0.3, 0.4, 0.3],
    ('Medium', 'DoNothing', 'Resist'): [0.2, 0.3, 0.5],

    ('High', 'Mitigate', 'Comply'): [0.5, 0.3, 0.2],
    ('High', 'Mitigate', 'Resist'): [0.3, 0.4, 0.3],
    ('High', 'DoNothing', 'Comply'): [0.2, 0.3, 0.5],
    ('High', 'DoNothing', 'Resist'): [0.1, 0.2, 0.7],
}

In [6]:
# Initial state
current_state = 'Medium'


In [7]:
# Tracking rewards
total_reward_p1 = 0
total_reward_p2 = 0

In [8]:
# Policy: random actions (can be replaced with Q-learning or strategic rules)
def sample_actions():
    return random.choice(actions_p1), random.choice(actions_p2)

In [10]:
# Run game
print(f"Initial State: {current_state}")
for t in range(10):
    action1, action2 = sample_actions()
    rewards = reward_table[(current_state, action1, action2)]
    total_reward_p1 += (gamma ** t) * rewards[0]
    total_reward_p2 += (gamma ** t) * rewards[1]

    trans_probs = transition_table[(current_state, action1, action2)]
    current_state = random.choices(states, trans_probs)[0]

    print(f"Round {t+1}: P1-{action1}, P2-{action2} -> State: {current_state}, Rewards: {rewards}")

print(f"\nDiscounted Total Reward:\n  Public Health (P1): {round(total_reward_p1, 2)}\n  Population (P2): {round(total_reward_p2, 2)}")


Initial State: Medium
Round 1: P1-DoNothing, P2-Resist -> State: Medium, Rewards: (-2, 5)
Round 2: P1-Mitigate, P2-Comply -> State: Low, Rewards: (6, 1)
Round 3: P1-DoNothing, P2-Resist -> State: Medium, Rewards: (0, 5)
Round 4: P1-DoNothing, P2-Comply -> State: High, Rewards: (1, 4)
Round 5: P1-DoNothing, P2-Comply -> State: High, Rewards: (-3, 2)
Round 6: P1-Mitigate, P2-Resist -> State: High, Rewards: (5, -3)
Round 7: P1-DoNothing, P2-Resist -> State: Medium, Rewards: (-5, 0)
Round 8: P1-Mitigate, P2-Comply -> State: Low, Rewards: (6, 1)
Round 9: P1-Mitigate, P2-Comply -> State: Low, Rewards: (5, 2)
Round 10: P1-DoNothing, P2-Resist -> State: Medium, Rewards: (0, 5)

Discounted Total Reward:
  Public Health (P1): 20.86
  Population (P2): 34.51
