In [3]:
%run RaceCar.py
import numpy as np
import pygame

In [5]:
def generate_map(w_cells, h_cells, min_lim, max_lim):
    map = np.zeros((w_cells, h_cells), dtype=np.int32)
    # Set boundaries
    map[w_cells // 2:, h_cells // 2:] = -1
    lims = np.random.randint(min_lim, max_lim, size=4)
    for h in range(h_cells):
        lims[0] = np.amax([min_lim, np.amin([max_lim, lims[0] + np.random.choice([-1, 0, 1])])])
        map[h, :lims[0]] = -1
        if h > h_cells // 2:
            lims[1] = np.amax([min_lim, np.amin([max_lim, lims[1] + np.random.choice([-1, 0, 1])])])
            map[h, h_cells // 2 - lims[1]:] = -1
    for w in range(w_cells):
        lims[2] = np.amax([min_lim, np.amin([max_lim, lims[2] + np.random.choice([-1, 0, 1])])])
        map[:lims[2], w] = -1
        if w > w_cells // 2:
            lims[3] = np.amax([min_lim, np.amin([max_lim, lims[3] + np.random.choice([-1, 0, 1])])])
            map[w_cells // 2 - lims[3]:, w] = -1
    # Set start line
    map[-1, np.argwhere(map[-1, :] == 0)] = 1
    # Set finish line
    map[np.argwhere(map[:, -1] == 0), -1] = 2
    return map

def draw_map(screen, map):
    w_size = width // map.shape[0]
    h_size = height // map.shape[1]
    for h in range(map.shape[1]):
        for w in range(map.shape[0]):
            rect = pygame.Rect(w * w_size, h * h_size, w_size, h_size)
            if map[h, w] == -1:
                pygame.draw.rect(screen, (50, 50, 50), rect, 0)
            elif map[h, w] == 1:
                pygame.draw.rect(screen, (0, 200, 0), rect, 0)
            elif map[h, w] == 2:
                pygame.draw.rect(screen, (200, 0, 0), rect, 0)
            else:
                pygame.draw.rect(screen, (200, 200, 200), rect, 0)
            pygame.draw.rect(screen, (0, 0, 0), rect, 1)

#
width = 850
height = 850
w_cells = 30
h_cells = 30
min_lim = 1
max_lim = 6

pygame.init()
screen = pygame.display.set_mode((width, height))
screen.fill(WHITE)
clock = pygame.time.Clock()
map = generate_map(w_cells, h_cells, min_lim, max_lim)
while True:
    draw_map(screen, map)
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit()
    pygame.display.update()

SystemExit: 

In [None]:
map = generate_map()
# Number of states.
S = W_CELLS * H_CELLS
# Number of actions.
A = 66
# Init a randomic policy.
pi = np.random.randint(66, size=S)
# Discount factor.
gamma = 1.0
# Exploration degree.
epsilon = 0.1
# Number of episodes.
episodes = 1000
# Inizialization.
N = np.zeros((S, A), dtype=np.int32)
Q = np.zeros((S, A), dtype=np.float64)

# Loop on episodes.
for e in range(episodes):
    # Exploring start.
    init_s = np.random.randint(S)
    # Run a game.
    bj = Blackjack(init_s, pi, epsilon)
    bj.play()
    states = bj.get_states()
    actions = bj.get_actions()
    rewards = bj.get_rewards()
    G = 0.0
    for t, s_t in reversed(list(enumerate(states))):
        G = rewards[t] + gamma * G
        N[s_t, actions[t]] += 1
        Q[s_t, actions[t]] += (1.0 / N[s_t, actions[t]]) * (G - Q[s_t, actions[t]])
        pi[s_t] = np.argmax(Q[s_t, :])