In [1]:
import sys
sys.path.append("../../RL-book")
from dataclasses import dataclass
from typing import Tuple, Dict
from rl.markov_decision_process import FiniteMarkovDecisionProcess, MarkovDecisionProcess
from rl.markov_decision_process import FinitePolicy, StateActionMapping
from rl.markov_process import FiniteMarkovProcess, FiniteMarkovRewardProcess
from rl.distribution import Categorical, Constant
from scipy.stats import poisson
import numpy as np
from more_itertools import distinct_permutations
import matplotlib.pyplot as plt
from rl.dynamic_programming import value_iteration, policy_iteration, policy_iteration_result
from rl.markov_decision_process import FinitePolicy, StateActionMapping

In [2]:
#Create Class:
@dataclass(frozen=True)
class Village_State:
    '''This class is the lilypad that the frog is on'''
    
    num_villagers_alive: int
    Vampire_is_alive : bool
        
VampireMapping = StateActionMapping[Village_State, int]

In [3]:
class Vampire_MDP_Finite(FiniteMarkovDecisionProcess[Village_State, int]):
    def __init__(self,num_villagers_alive:int):
        self.num_villagers_alive = num_villagers_alive
        super().__init__(self.get_action_transition_reward_map())
    
    def get_action_transition_reward_map(self) -> VampireMapping:
        d: Dict[Village_State, Dict[str, Categorical[Tuple[Village_State,float]]]] = {}
        #{State:{Action 1:{(Next_State,Reward):Probability},{Action 2:{(Next_State,Reward):Probability}}}}

        for villagers in range(1,self.num_villagers_alive+1):
            START_STATE = Village_State(num_villagers_alive = villagers,Vampire_is_alive=True)
            action_dict = {}
            
            for poison_amount in range(villagers+1):
                action_str = "Poison " + str(poison_amount) + " villigers"
                END_STATE_Murdered = Village_State(num_villagers_alive = villagers-poison_amount,Vampire_is_alive=False)
                END_STATE_Survived = Village_State(num_villagers_alive = villagers-poison_amount-1,Vampire_is_alive=True)
                
                prob_dict = {}
                
                vampire_murdered_prob = poison_amount/villagers
                vampire_survive_prob = 1 - poison_amount/villagers
                
                Reward_vampire_survives = 0
                Reward_vampire_is_murdered = villagers - poison_amount
                
                #Vampire is murdered:
                prob_dict[(END_STATE_Murdered,Reward_vampire_is_murdered)] = vampire_murdered_prob
                
                #Vampire survives
                prob_dict[(END_STATE_Survived,Reward_vampire_survives)] = vampire_survive_prob
                
                action_dict[action_str] =Categorical(prob_dict)
                
                d[END_STATE_Murdered] = None
                
            d[START_STATE] = action_dict
            
            #Create Terminal States:
            
        return d
        

In [4]:
Vampire_MDP = Vampire_MDP_Finite(num_villagers_alive=10)

print("MDP Transition Map")
print("------------------")
print(Vampire_MDP)

MDP Transition Map
------------------
Village_State(num_villagers_alive=1, Vampire_is_alive=False) is a Terminal State
Village_State(num_villagers_alive=0, Vampire_is_alive=False) is a Terminal State
From State Village_State(num_villagers_alive=1, Vampire_is_alive=True):
  With Action Poison 0 villigers:
    To [State Village_State(num_villagers_alive=1, Vampire_is_alive=False) and Reward 1.000] with Probability 0.000
    To [State Village_State(num_villagers_alive=0, Vampire_is_alive=True) and Reward 0.000] with Probability 1.000
  With Action Poison 1 villigers:
    To [State Village_State(num_villagers_alive=0, Vampire_is_alive=False) and Reward 0.000] with Probability 1.000
    To [State Village_State(num_villagers_alive=-1, Vampire_is_alive=True) and Reward 0.000] with Probability 0.000
Village_State(num_villagers_alive=2, Vampire_is_alive=False) is a Terminal State
From State Village_State(num_villagers_alive=2, Vampire_is_alive=True):
  With Action Poison 0 villigers:
    To [St

In [5]:
from pprint import pprint
VF,Policies = policy_iteration_result(Vampire_MDP_Finite(50),1)

pprint(VF)

pprint(Policies)

# for i,policy in enumerate(Policies):
#     if i > 6:
#         break
#     else:
#         print (policy)

{Village_State(num_villagers_alive=6, Vampire_is_alive=True): 1.7777777777777777,
 Village_State(num_villagers_alive=5, Vampire_is_alive=True): 1.5000000000000002,
 Village_State(num_villagers_alive=1, Vampire_is_alive=True): 0.0,
 Village_State(num_villagers_alive=2, Vampire_is_alive=True): 0.5,
 Village_State(num_villagers_alive=3, Vampire_is_alive=True): 0.6666666666666666,
 Village_State(num_villagers_alive=4, Vampire_is_alive=True): 1.125,
 Village_State(num_villagers_alive=7, Vampire_is_alive=True): 2.2321428571428568,
 Village_State(num_villagers_alive=8, Vampire_is_alive=True): 2.625,
 Village_State(num_villagers_alive=9, Vampire_is_alive=True): 3.0,
 Village_State(num_villagers_alive=10, Vampire_is_alive=True): 3.3857142857142857,
 Village_State(num_villagers_alive=11, Vampire_is_alive=True): 3.805194805194805,
 Village_State(num_villagers_alive=12, Vampire_is_alive=True): 4.21875,
 Village_State(num_villagers_alive=13, Vampire_is_alive=True): 4.615384615384615,
 Village_State

In [6]:
#Generate Starting Distribution and Finite Policy
num_villagers = 10
def generate_policy_and_start_distribution(num_villagers):
    policy = {}
    start_distribution = {}
    for villagers in range(1,num_villagers+1):
        START_STATE = Village_State(num_villagers_alive = villagers,Vampire_is_alive=True)
        action_dict = {}
        start_distribution[START_STATE] = 1/num_villagers

        for poison_amount in range(villagers+1):
            action_str = "Poison " + str(poison_amount) + " villigers"

            action_dict[action_str] = 1/(villagers+1)

            END_STATE_Murdered = Village_State(num_villagers_alive = villagers-poison_amount,Vampire_is_alive=False)
            policy[END_STATE_Murdered] = None

        policy[START_STATE] = Categorical(action_dict)
    policy[Village_State(num_villagers_alive = 0,Vampire_is_alive=True)]=None
    policy[Village_State(num_villagers_alive = 0,Vampire_is_alive=False)]=None
    policy = FinitePolicy(policy)
    start_distribution = Categorical(start_distribution)
    return policy,start_distribution

policy,start_distribution = generate_policy_and_start_distribution(10)

In [7]:

for i, trace in enumerate(Vampire_MDP.action_traces(start_distribution,policy)):
    if i >= 1:
        break
    print ("New Trace")
    
    for j,x in enumerate(trace):
        print ("Night:",j,x.state)
        help(x)
    print ("")

New Trace
Night: 0 Village_State(num_villagers_alive=7, Vampire_is_alive=True)
Help on TransitionStep in module rl.markov_decision_process object:

class TransitionStep(typing.Generic)
 |  TransitionStep(*args, **kwds)
 |  
 |  A single step in the simulation of an MDP, containing:
 |  
 |  state -- the state we start from
 |  action -- the action we took at that state
 |  next_state -- the state we ended up in after the action
 |  reward -- the instantaneous reward we got for this transition
 |  
 |  Method resolution order:
 |      TransitionStep
 |      typing.Generic
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __delattr__(self, name)
 |  
 |  __eq__(self, other)
 |  
 |  __hash__(self)
 |  
 |  __init__(self, state: 'S', action: 'A', next_state: 'S', reward: 'float') -> None
 |  
 |  __repr__(self)
 |  
 |  __setattr__(self, name, value)
 |  
 |  add_return(self, γ: 'float', return_: 'float') -> 'ReturnStep[S, A]'
 |      Given a γ and the return from 'next_stat

In [8]:
policy

Village_State(num_villagers_alive=1, Vampire_is_alive=False) is a Terminal State
Village_State(num_villagers_alive=0, Vampire_is_alive=False) is a Terminal State
For State Village_State(num_villagers_alive=1, Vampire_is_alive=True):
  Do Action Poison 0 villigers with Probability 0.500
  Do Action Poison 1 villigers with Probability 0.500
Village_State(num_villagers_alive=2, Vampire_is_alive=False) is a Terminal State
For State Village_State(num_villagers_alive=2, Vampire_is_alive=True):
  Do Action Poison 0 villigers with Probability 0.333
  Do Action Poison 1 villigers with Probability 0.333
  Do Action Poison 2 villigers with Probability 0.333
Village_State(num_villagers_alive=3, Vampire_is_alive=False) is a Terminal State
For State Village_State(num_villagers_alive=3, Vampire_is_alive=True):
  Do Action Poison 0 villigers with Probability 0.250
  Do Action Poison 1 villigers with Probability 0.250
  Do Action Poison 2 villigers with Probability 0.250
  Do Action Poison 3 villigers 

In [9]:
help(Vampire_MDP)

Help on Vampire_MDP_Finite in module __main__ object:

class Vampire_MDP_Finite(rl.markov_decision_process.FiniteMarkovDecisionProcess)
 |  Vampire_MDP_Finite(*args, **kwds)
 |  
 |  A Markov Decision Process with finite state and action spaces.
 |  
 |  Method resolution order:
 |      Vampire_MDP_Finite
 |      rl.markov_decision_process.FiniteMarkovDecisionProcess
 |      rl.markov_decision_process.MarkovDecisionProcess
 |      abc.ABC
 |      typing.Generic
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, num_villagers_alive: int)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  get_action_transition_reward_map(self) -> Mapping[__main__.Village_State, Union[Mapping[int, rl.distribution.FiniteDistribution[Tuple[__main__.Village_State, float]]], NoneType]]
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __abstractmethods__ = frozenset()
 | 