In [192]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from chapter04.plot import plot_animated_mesh
from mdp import policy_iteration
from rental import JacksCarRental, NonlinearJacksCarRental
import plotly.express as px

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [194]:
mdp = JacksCarRental()

days = 365
rewards = np.zeros(days, dtype=int)
actions = np.zeros(days, dtype=int)
states = np.zeros([days, len(mdp.observation_space.nvec)], dtype=int)

mdp.reset()
for day in range(days):
    action = mdp.action_space.sample()
    state, reward, done, info = mdp.step(action)
    rewards[day] = reward
    actions[day] = action
    states[day] = state

In [195]:
states_df = pd.DataFrame(states, columns=['A', 'B'])

In [196]:
fig = px.bar(states_df, barmode='overlay')
fig.update_layout(template='plotly_white', xaxis_title='day', yaxis_title='no. cars', legend_title='location')
fig.show()

In [197]:
action_rew_df = pd.DataFrame(np.vstack([actions - mdp.max_transfer, rewards]).T, columns=['action', 'reward'])

In [198]:
fig = px.density_heatmap(action_rew_df, x="action", y="reward", nbinsx=mdp.max_transfer*4,
                         range_x=[-mdp.max_transfer-.5, +mdp.max_transfer+.5])
fig.update_layout(template='plotly_white', title="Histogram of Revenue vs Transfer",
                  yaxis_title='Revenue', xaxis_title="Transfer (1 -> 2)")
fig.show()

In [199]:
mdp = JacksCarRental(max_poisson=30)
%timeit mdp.step(mdp.action_space.sample())

mdp = JacksCarRental(max_poisson=20)
%timeit mdp.step(mdp.action_space.sample())

mdp = JacksCarRental(max_poisson=10)
%timeit mdp.step(mdp.action_space.sample())

42.4 ms ± 1.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
8.81 ms ± 194 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
450 µs ± 21.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [222]:
mdp = JacksCarRental(max_poisson=10)
results = policy_iteration(mdp, save_history=True)
value, policy, history = results

Convergence=100.0% : : 25it [00:49,  1.99s/it]


In [245]:
hist = [h.T - mdp.max_transfer for h in history['policy']]
plot_animated_mesh(hist)

In [244]:
hist = [h.T - mdp.max_transfer for h in history['policy']]
plot_animated_mesh(history['value'])