In [1]:
from rl.markov_process import (
    MarkovProcess,
    FiniteMarkovRewardProcess,
    NonTerminal,
    State,
    Terminal,
)

from rl.distribution import Categorical, Constant, FiniteDistribution


import matplotlib.pyplot as plt
import numpy as np
from numpy.random import randint

import pandas as pd

from dataclasses import dataclass

from typing import Dict, Mapping, Tuple

#### Frog Problem

In [2]:
@dataclass
class River:
    """Class for the river that the froggie attempts to cross."""

    num_lily_pads: int

    @property
    def river_bank_idx(self):
        """Terminal position index corresponding to riverbank."""
        return self.num_lily_pads


@dataclass
class Froggie_State(int):

    lily_pad_idx: int

    def __hash__(self):
        return super().__hash__()


class Froggie_Mapping(dict):
    state_map: Dict[State, float] = {}


@dataclass
class Froggie_Position(MarkovProcess[Froggie_State]):
    river: River

    def transition(
        self, state: NonTerminal[Froggie_State]
    ) -> Categorical[State[Froggie_State]]:
        state_map: Dict[State, float] = {}
        prob = 1 / (self.river.num_lily_pads - state.lily_pad_idx)

        for pad in range(state.lily_pad_idx + 1, self.river.num_lily_pads):
            state_map[NonTerminal(Froggie_State(pad))] = prob
        state_map[Terminal(Froggie_State(self.river.river_bank_idx))] = prob

        return Categorical(state_map)


#### Modeling the Froggie Puzzle as a Finite Markov Reward Process

In [3]:
class FroggieMRPFinite(FiniteMarkovRewardProcess[Froggie_State]):
    def __init__(self, river: River):
        self.river = river
        super().__init__(self.get_transition_reward_map())

    def get_transition_reward_map(
        self,
    ) -> Mapping[Froggie_State, FiniteDistribution[Tuple[Froggie_State, Tuple]]]:
        d: Dict[Froggie_State, Categorical[Tuple[Froggie_State, float]]] = {}
        reward = 1.0
        for starting_pad in np.arange(self.river.num_lily_pads):
            state = Froggie_State(starting_pad)
            sr_probs_map: Dict[Tuple[Froggie_State, float], float] = {}
            prob = 1 / (self.river.num_lily_pads - state.lily_pad_idx)

            for pad_idx in range(state.lily_pad_idx + 1, self.river.num_lily_pads):
                sr_probs_map[(Froggie_State(pad_idx), reward)] = prob
            sr_probs_map[(Froggie_State(self.river.river_bank_idx), reward)] = prob

            d[state] = Categorical(sr_probs_map)

        return d


In [4]:
# obtain expected number of hops as a function of starting positionthrough the absence of a discounting factor (gamma = 1.0)

river = River(100)

fmrp = FroggieMRPFinite(river=river)
fmrp.get_value_function_vec(gamma=1.0)


array([5.18737752, 5.17737752, 5.16727651, 5.15707243, 5.14676315,
       5.13634648, 5.12582017, 5.11518187, 5.10442918, 5.09355961,
       5.0825706 , 5.07145949, 5.06022354, 5.0488599 , 5.03736565,
       5.02573774, 5.01397303, 5.00206827, 4.99002008, 4.97782496,
       4.96547928, 4.95297928, 4.94032105, 4.92750054, 4.91451353,
       4.90135563, 4.8880223 , 4.87450878, 4.86081015, 4.84692126,
       4.83283676, 4.81855104, 4.80405829, 4.78935241, 4.77442703,
       4.75927552, 4.7438909 , 4.7282659 , 4.71239289, 4.69626386,
       4.67987041, 4.66320375, 4.64625459, 4.62901321, 4.61146935,
       4.59361221, 4.57543039, 4.55691188, 4.53804395, 4.51881318,
       4.49920534, 4.47920534, 4.45879718, 4.43796384, 4.41668725,
       4.39494812, 4.37272589, 4.34999862, 4.32674281, 4.30293328,
       4.27854304, 4.25354304, 4.22790201, 4.20158622, 4.1745592 ,
       4.14678142, 4.11820999, 4.08879823, 4.0584952 , 4.0272452 ,
       3.99498713, 3.9616538 , 3.92717104, 3.89145675, 3.85441