Skip to content
Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
102 lines (84 sloc) 3.36 KB
#######################################################################
# Copyright (C) #
# 2016-2018 Shangtong Zhang(zhangshangtong.cpp@gmail.com) #
# 2016 Kenta Shimada(hyperkentakun@gmail.com) #
# Permission given to modify the code as long as you keep this #
# declaration at the top #
#######################################################################
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.table import Table
WORLD_SIZE = 4
# left, up, right, down
ACTIONS = [np.array([0, -1]),
np.array([-1, 0]),
np.array([0, 1]),
np.array([1, 0])]
ACTION_PROB = 0.25
def is_terminal(state):
x, y = state
return (x == 0 and y == 0) or (x == WORLD_SIZE - 1 and y == WORLD_SIZE - 1)
def step(state, action):
state = np.array(state)
next_state = (state + action).tolist()
x, y = next_state
if x < 0 or x >= WORLD_SIZE or y < 0 or y >= WORLD_SIZE:
next_state = state.tolist()
reward = -1
return next_state, reward
def draw_image(image):
fig, ax = plt.subplots()
ax.set_axis_off()
tb = Table(ax, bbox=[0, 0, 1, 1])
nrows, ncols = image.shape
width, height = 1.0 / ncols, 1.0 / nrows
# Add cells
for (i,j), val in np.ndenumerate(image):
# Index either the first or second item of bkg_colors based on
# a checker board pattern
idx = [j % 2, (j + 1) % 2][i % 2]
color = 'white'
tb.add_cell(i, j, width, height, text=val,
loc='center', facecolor=color)
# Row Labels...
for i, label in enumerate(range(len(image))):
tb.add_cell(i, -1, width, height, text=label+1, loc='right',
edgecolor='none', facecolor='none')
# Column Labels...
for j, label in enumerate(range(len(image))):
tb.add_cell(-1, j, width, height/2, text=label+1, loc='center',
edgecolor='none', facecolor='none')
ax.add_table(tb)
def compute_state_value(in_place=False):
new_state_values = np.zeros((WORLD_SIZE, WORLD_SIZE))
state_values = new_state_values.copy()
iteration = 1
while True:
src = new_state_values if in_place else state_values
for i in range(WORLD_SIZE):
for j in range(WORLD_SIZE):
if is_terminal([i, j]):
continue
value = 0
for action in ACTIONS:
(next_i, next_j), reward = step([i, j], action)
value += ACTION_PROB * (reward + src[next_i, next_j])
new_state_values[i, j] = value
if np.sum(np.abs(new_state_values - state_values)) < 1e-4:
state_values = new_state_values.copy()
break
state_values = new_state_values.copy()
iteration += 1
return state_values, iteration
def figure_4_1():
values, sync_iteration = compute_state_value(in_place=False)
_, asycn_iteration = compute_state_value(in_place=True)
draw_image(np.round(values, decimals=2))
print('In-place: %d iterations' % (asycn_iteration))
print('Synchronous: %d iterations' % (sync_iteration))
plt.savefig('../images/figure_4_1.png')
plt.close()
if __name__ == '__main__':
figure_4_1()
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.