# Assignment 3

## Question 3

In [45]:
from dataclasses import dataclass
from rl.markov_decision_process import FiniteMarkovDecisionProcess
from rl.markov_decision_process import FinitePolicy, StateActionMapping
from rl.markov_process import FiniteMarkovProcess, FiniteMarkovRewardProcess
from rl.distribution import Categorical, Constant
from typing import Dict, Tuple

from pprint import pprint

In [46]:
@dataclass(frozen=True)
class FrogState:
    position: int 
        
        
FrogMapping = StateActionMapping[FrogState, int]
        
        
class FrogCroakMDP(FiniteMarkovDecisionProcess[FrogState, int]):
    
    def __init__(
        self,
        num_lilypads: int,
    
    ):
        self.num_lilypads: int = num_lilypads
        super().__init__(self.get_action_transition_reward_map())
        
    def get_action_transition_reward_map(self) -> FrogMapping:
        d: Dict[FrogState, Dict[int,Constant[Tuple[FrogState, int]]]] = {}
        
        # set the terminal states
        d[0] = None
        d[self.num_lilypads] = None
        
        # loop through all non-terminal states and generate the mapping
        for s in range(1, self.num_lilypads):
            d1: Dict[int,Categorical[Tuple[FrogState, int]]] = {}
                            
            # Action A is represented by True
            # Action B is represented by False
            d_actionA: Dict[Tuple[FrogState, int]] = {}
            d_actionB: Dict[Tuple[FrogState, int]] = {}
            for s_prime in range(0, self.num_lilypads+1):
                # set the reward variable
                if (s_prime == 0):
                    reward = -1
                elif (s_prime == self.num_lilypads):
                    reward = 1
                else:
                    reward = 0
                
                # set the dict for action A
                if (s == s_prime-1):
                    d_actionA[(FrogState(s_prime),reward)] = s/self.num_lilypads
                elif (s == s_prime+1):
                    d_actionA[(FrogState(s_prime),reward)] = 1-s/self.num_lilypads
                else:
                    d_actionA[(FrogState(s_prime),reward)] = 0 
                
                # set the dict for action B
                if (s != s_prime):
                    d_actionB[(FrogState(s_prime),reward)] = 1/self.num_lilypads
                else:
                    d_actionB[(FrogState(s_prime),reward)] = 0
                
            d1[1]  = Categorical(d_actionA)
            d1[0] = Categorical(d_actionB)
            
            d[s] = d1
            
        return d
            


In [50]:
if __name__ == "__main__":
    user_num_lilypads = 3 # or 6 or 9
    
    user_gamma = 0.5
    
    frog_mdp: FiniteMarkovDecisionProcess[FrogState, int] =\
        FrogCroakMDP(user_num_lilypads)
    
    print("MDP Transition Map")
    print("------------------")
    print(frog_mdp)

    fdp: FinitePolicy[FrogState, int] = FinitePolicy(
        {FrogState(position):
         Constant(0) for position in
         range(0, user_num_lilypads+1)}
    )
 
    print("Policy Map")
    print("----------")
    print(fdp)

    implied_mrp: FiniteMarkovRewardProcess[FrogState] =\
        frog_mdp.apply_finite_policy(fdp)
    print("Implied MP Transition Map")
    print("--------------")
    print(FiniteMarkovProcess(implied_mrp.transition_map))

    print("Implied MRP Transition Reward Map")
    print("---------------------")
    print(implied_mrp)

    print("Implied MP Stationary Distribution")
    print("-----------------------")
    implied_mrp.display_stationary_distribution()
    print()

    print("Implied MRP Reward Function")
    print("---------------")
    implied_mrp.display_reward_function()
    print()

    print("Implied MRP Value Function")
    print("--------------")
    implied_mrp.display_value_function(gamma=user_gamma)
    print()

MDP Transition Map
------------------
0 is a Terminal State
3 is a Terminal State
From State 1:
  With Action 1:
    To [State FrogState(position=0) and Reward -1.000] with Probability 0.667
    To [State FrogState(position=1) and Reward 0.000] with Probability 0.000
    To [State FrogState(position=2) and Reward 0.000] with Probability 0.333
    To [State FrogState(position=3) and Reward 1.000] with Probability 0.000
  With Action 0:
    To [State FrogState(position=0) and Reward -1.000] with Probability 0.333
    To [State FrogState(position=1) and Reward 0.000] with Probability 0.000
    To [State FrogState(position=2) and Reward 0.000] with Probability 0.333
    To [State FrogState(position=3) and Reward 1.000] with Probability 0.333
From State 2:
  With Action 1:
    To [State FrogState(position=0) and Reward -1.000] with Probability 0.000
    To [State FrogState(position=1) and Reward 0.000] with Probability 0.333
    To [State FrogState(position=2) and Reward 0.000] with Probabi

KeyError: 1

## Question 4