In [59]:
import numpy as np
import gym
from gym import wrappers
from collections import defaultdict
import sys

#env source https://github.com/dennybritz/reinforcement-learning/
from blackjack import BlackjackEnv

In [60]:
env = BlackjackEnv()

In [61]:
env.reset()

(14, 9, False)

In [62]:
def get_probs(Q_s, epsilon, nA):

    
    policy_s = np.ones(nA) * epsilon / nA
    best_a = np.argmax(Q_s)
    policy_s[best_a] = 1 - epsilon + (epsilon / nA)
    return policy_s

In [63]:
def play(env, Q, epsilon, nA):
    
    episode = []
    state = env.reset()
    while True:
        probs = [0.8, 0.2] if state[0] > 18 else [0.2, 0.8]
        action = np.random.choice(np.arange(2), p=probs)
        next_state, reward, done, info = env.step(action)
        episode.append((state, action, reward))
        state = next_state
        if done:
            break
    return episode

In [64]:


def update_Q(env, episode, Q, alpha, gamma):

    
    for s, a, r in episode:
        first_occurence_idx = next(i for i,x in enumerate(episode) if x[0] == s)
        G = sum([x[2]*(gamma**i) for i,x in enumerate(episode[first_occurence_idx:])])
        Q[s][a] = Q[s][a] + alpha*(G - Q[s][a])
    
    return Q




In [65]:
def mc_control(env, num_episodes):

    
    epsilon = 1.0
    eps_min = 0.01
    decay = 0.9999
    alpha = 0.001
    gamma = 1.0
    
    nA = env.action_space.n
    Q = defaultdict(lambda: np.zeros(nA))
    
    
    for i_episode in range(1, num_episodes+1):
        if i_episode % 1000 == 0:
            print("\rEpisode {}/{}.".format(i_episode, num_episodes), end="")
            sys.stdout.flush()
        
        epislon = max(epsilon*decay, eps_min)
        episode = play(env, Q, epsilon, nA)
        print(episode)
        Q = update_Q(env, episode, Q, alpha, gamma)
    
    policy = dict((k,np.argmax(v)) for k, v in Q.items())
    
    return policy, Q

In [66]:
policy,Q = mc_control(env,10000)

[((15, 5, False), 1, -1)]
[((20, 4, False), 0, 1)]
[((20, 1, False), 0, -1)]
[((21, 2, True), 0, 1)]
[((12, 10, False), 1, 0), ((21, 10, False), 1, -1)]
[((21, 1, True), 0, 1)]
[((14, 10, False), 1, -1)]
[((21, 2, True), 0, 1)]
[((20, 3, False), 0, 1)]
[((19, 7, False), 0, 1)]
[((19, 10, False), 0, -1)]
[((15, 8, False), 1, -1)]
[((19, 10, False), 0, 0)]
[((18, 4, False), 1, -1)]
[((14, 5, False), 0, -1)]
[((18, 6, False), 1, -1)]
[((18, 5, False), 1, 0), ((20, 5, False), 0, 0)]
[((12, 9, False), 1, 0), ((15, 9, False), 0, 1)]
[((15, 5, False), 1, -1)]
[((19, 4, False), 0, -1)]
[((18, 1, False), 1, -1)]
[((20, 1, False), 0, -1)]
[((14, 4, False), 1, 0), ((16, 4, False), 1, 0), ((20, 4, False), 0, -1)]
[((16, 6, False), 1, 0), ((17, 6, False), 1, 0), ((18, 6, False), 1, -1)]
[((17, 10, False), 0, -1)]
[((19, 1, False), 0, 1)]
[((18, 10, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((19, 9, False), 0, 1)]
[((21, 10, True), 1, 0), ((19, 10, False), 0, 0)]
[((17, 10, False), 1, 0), ((21, 10,

[((13, 4, False), 1, 0), ((18, 4, False), 1, 0), ((19, 4, False), 0, 1)]
[((18, 8, False), 1, -1)]
[((19, 10, False), 0, -1)]
[((13, 10, False), 0, 1)]
[((14, 1, False), 1, -1)]
[((19, 5, False), 0, 1)]
[((13, 5, False), 1, -1)]
[((14, 1, False), 0, 1)]
[((18, 8, False), 0, 1)]
[((17, 10, False), 0, -1)]
[((21, 10, True), 0, 1)]
[((15, 10, False), 1, -1)]
[((21, 1, True), 0, 0)]
[((13, 3, False), 0, 1)]
[((12, 4, False), 1, 0), ((20, 4, False), 0, 1)]
[((12, 5, False), 1, -1)]
[((18, 10, False), 0, 0)]
[((20, 1, True), 0, 1)]
[((17, 1, False), 0, -1)]
[((20, 10, False), 0, 0)]
[((14, 5, False), 1, 0), ((15, 5, False), 1, 0), ((18, 5, False), 1, 0), ((19, 5, False), 0, 0)]
[((21, 7, True), 0, 1)]
[((21, 5, True), 0, 1)]
[((18, 10, True), 1, 0), ((20, 10, True), 0, 1)]
[((16, 8, False), 1, 0), ((17, 8, False), 1, 0), ((19, 8, False), 0, 1)]
[((15, 3, False), 1, 0), ((20, 3, False), 0, -1)]
[((13, 1, False), 1, 0), ((16, 1, False), 1, -1)]
[((16, 2, False), 1, 0), ((17, 2, False), 1, -1)]

[((16, 9, False), 1, 0), ((21, 9, False), 0, 1)]
[((16, 8, False), 0, -1)]
[((12, 10, False), 1, 0), ((18, 10, False), 1, 0), ((19, 10, False), 0, -1)]
[((20, 8, True), 0, 0)]
[((21, 7, False), 1, -1)]
[((16, 1, False), 0, -1)]
[((18, 4, True), 1, 0), ((17, 4, False), 1, 0), ((18, 4, False), 1, -1)]
[((19, 3, False), 1, -1)]
[((17, 7, False), 1, -1)]
[((13, 2, False), 1, -1)]
[((19, 10, True), 0, -1)]
[((18, 6, False), 0, 1)]
[((16, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((13, 5, False), 1, 0), ((21, 5, False), 0, 1)]
[((15, 2, False), 0, 1)]
[((20, 10, False), 0, 1)]
[((12, 10, False), 1, 0), ((15, 10, False), 1, -1)]
[((14, 2, False), 1, 0), ((17, 2, False), 1, -1)]
[((13, 10, False), 1, 0), ((16, 10, False), 0, -1)]
[((20, 10, False), 0, 1)]
[((14, 10, False), 0, 1)]
[((14, 1, False), 0, -1)]
[((20, 4, False), 0, 0)]
[((17, 10, False), 1, -1)]
[((13, 7, False), 1, -1)]
[((18, 9, False), 1, 0), ((20, 9, False), 0, 0)]
[((19, 6, False), 0, -1)]
[((12, 10, False), 1, 0), ((20, 10

[((13, 6, False), 1, 0), ((20, 6, False), 0, 1)]
[((15, 7, False), 1, 0), ((18, 7, False), 1, 0), ((19, 7, False), 0, -1)]
[((18, 3, False), 1, -1)]
[((12, 1, True), 1, 0), ((17, 1, True), 1, 0), ((17, 1, False), 1, 0), ((21, 1, False), 0, 0)]
[((12, 10, False), 0, -1)]
[((15, 1, True), 1, 0), ((15, 1, False), 1, -1)]
[((19, 5, False), 0, -1)]
[((18, 10, False), 1, -1)]
[((19, 10, False), 0, -1)]
[((15, 7, False), 1, -1)]
[((16, 3, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((13, 3, False), 1, 0), ((15, 3, False), 1, -1)]
[((13, 4, False), 1, 0), ((16, 4, False), 0, -1)]
[((17, 9, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((20, 10, False), 0, 1)]
[((14, 10, False), 1, 0), ((20, 10, False), 0, 0)]
[((19, 4, False), 0, 1)]
[((19, 8, False), 0, 1)]
[((21, 2, True), 0, 1)]
[((17, 1, True), 1, 0), ((19, 1, True), 0, -1)]
[((15, 5, False), 1, 0), ((21, 5, False), 1, -1)]
[((17, 9, False), 1, 0), ((21, 9, False), 0, 1)]
[((13, 4, False), 1, 0), ((15, 4, False), 1, -1)]
[((21, 4, False), 0, 1)

Episode 2000/10000.[((21, 2, False), 0, 1)]
[((20, 10, False), 0, 1)]
[((18, 8, False), 1, -1)]
[((20, 8, False), 0, -1)]
[((17, 9, False), 1, -1)]
[((19, 6, False), 0, 1)]
[((16, 5, False), 1, 0), ((20, 5, False), 0, 1)]
[((20, 6, False), 0, 1)]
[((13, 10, False), 1, -1)]
[((18, 8, False), 1, -1)]
[((20, 5, False), 0, 1)]
[((18, 7, False), 1, -1)]
[((21, 1, True), 0, 1)]
[((16, 9, False), 1, -1)]
[((18, 2, True), 1, 0), ((17, 2, False), 0, 1)]
[((15, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((20, 10, True), 0, 1)]
[((12, 7, False), 0, -1)]
[((14, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((14, 10, False), 1, 0), ((17, 10, False), 1, 0), ((20, 10, False), 0, -1)]
[((19, 7, False), 0, 1)]
[((21, 3, True), 0, 1)]
[((12, 2, False), 1, 0), ((21, 2, False), 0, 1)]
[((14, 2, False), 1, 0), ((18, 2, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((13, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((16, 10, True), 1, 0), ((15, 10, False), 1, -1)]
[((21, 7, True), 0, 1)]
[((16, 10, False), 1,

[((12, 5, False), 0, 1)]
[((14, 2, False), 1, 0), ((20, 2, False), 1, -1)]
[((18, 10, False), 1, -1)]
[((12, 8, False), 1, -1)]
[((18, 2, False), 0, -1)]
[((19, 10, False), 1, 0), ((21, 10, False), 1, -1)]
[((13, 2, False), 1, 0), ((18, 2, False), 0, 1)]
[((13, 2, False), 1, 0), ((21, 2, False), 0, 1)]
[((19, 10, False), 0, -1)]
[((20, 6, False), 0, 1)]
[((15, 10, False), 1, 0), ((17, 10, False), 1, -1)]
[((18, 8, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((20, 9, False), 0, 1)]
[((17, 10, False), 0, -1)]
[((16, 7, False), 1, 0), ((19, 7, False), 0, 1)]
[((15, 5, False), 1, -1)]
[((13, 10, False), 0, 1)]
[((16, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((17, 9, True), 0, -1)]
[((12, 10, False), 1, 0), ((15, 10, False), 1, -1)]
[((13, 10, False), 1, -1)]
[((20, 8, False), 0, 1)]
[((13, 4, False), 1, -1)]
[((13, 8, False), 1, 0), ((17, 8, False), 0, -1)]
[((20, 4, False), 0, 1)]
[((14, 7, False), 1, -1)]
[((16, 10, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((15, 10, False), 1, 0), ((

[((20, 4, False), 1, -1)]
[((12, 7, False), 1, -1)]
[((12, 5, False), 1, 0), ((18, 5, False), 1, 0), ((20, 5, False), 0, 1)]
[((19, 7, False), 1, -1)]
[((13, 10, False), 1, -1)]
[((21, 1, True), 0, 1)]
[((16, 8, True), 1, 0), ((16, 8, False), 0, 1)]
[((13, 2, False), 1, -1)]
[((21, 1, True), 0, 1)]
[((12, 5, False), 1, 0), ((13, 5, False), 1, 0), ((17, 5, False), 0, 1)]
[((19, 10, False), 0, -1)]
[((14, 1, False), 1, -1)]
[((19, 8, False), 0, 1)]
[((18, 10, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((16, 10, False), 1, -1)]
[((15, 2, False), 0, -1)]
[((13, 1, True), 0, -1)]
[((18, 10, False), 0, 1)]
[((12, 2, False), 1, 0), ((16, 2, False), 1, 0), ((17, 2, False), 1, -1)]
[((13, 9, False), 1, 0), ((16, 9, False), 1, 0), ((20, 9, False), 0, 1)]
[((20, 4, False), 0, 1)]
[((12, 7, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((12, 8, False), 1, 0), ((14, 8, False), 1, -1)]
[((13, 10, False), 1, -1)]
[((15, 10, False), 0, -1)]
[((19, 4, False), 0, 1)]
[((13, 3, False), 1, -1)]
[((13, 1, Fal

[((18, 3, True), 1, 0), ((15, 3, False), 1, -1)]
[((15, 10, False), 1, 0), ((18, 10, False), 1, -1)]
[((21, 1, True), 0, 1)]
[((17, 6, False), 1, -1)]
[((15, 3, False), 0, -1)]
[((17, 9, False), 1, -1)]
[((13, 4, False), 1, 0), ((16, 4, False), 0, 1)]
[((20, 5, False), 0, 1)]
[((20, 5, False), 0, -1)]
[((17, 3, False), 1, -1)]
[((17, 5, True), 1, 0), ((13, 5, False), 1, 0), ((15, 5, False), 0, -1)]
[((14, 2, False), 1, 0), ((17, 2, False), 1, -1)]
[((14, 7, False), 1, 0), ((15, 7, False), 0, 1)]
[((21, 10, True), 0, 1)]
[((12, 10, False), 1, 0), ((21, 10, False), 1, -1)]
[((19, 4, False), 0, 0)]
[((13, 9, False), 1, -1)]
[((13, 5, True), 0, -1)]
[((16, 4, False), 1, 0), ((20, 4, False), 0, 1)]
[((16, 7, False), 1, -1)]
[((19, 1, False), 0, -1)]
[((18, 3, False), 1, 0), ((20, 3, False), 0, 0)]
[((13, 3, False), 0, -1)]
[((18, 10, True), 1, 0), ((12, 10, False), 1, 0), ((18, 10, False), 1, 0), ((20, 10, False), 0, 0)]
[((13, 3, False), 1, 0), ((19, 3, False), 0, 1)]
[((13, 10, False), 1,

[((14, 6, False), 1, 0), ((21, 6, False), 0, 1)]
[((14, 6, False), 1, 0), ((20, 6, False), 0, 1)]
[((16, 8, False), 1, 0), ((19, 8, False), 0, 1)]
[((21, 3, True), 0, 1)]
[((14, 7, False), 0, -1)]
[((21, 4, True), 0, 1)]
[((13, 10, False), 1, -1)]
[((12, 1, False), 1, 0), ((18, 1, False), 1, -1)]
[((14, 1, False), 1, -1)]
[((16, 4, False), 1, -1)]
[((16, 2, False), 1, 0), ((19, 2, False), 1, -1)]
[((16, 5, True), 1, 0), ((15, 5, False), 1, -1)]
[((18, 10, False), 0, 1)]
[((19, 3, False), 0, 0)]
[((15, 10, False), 1, -1)]
[((13, 10, False), 1, 0), ((17, 10, False), 1, -1)]
[((12, 2, False), 1, 0), ((16, 2, False), 1, -1)]
[((17, 9, True), 1, 0), ((20, 9, True), 0, 1)]
[((13, 10, False), 1, 0), ((14, 10, False), 0, -1)]
[((12, 2, False), 0, -1)]
[((16, 3, False), 0, 1)]
[((13, 5, False), 1, 0), ((18, 5, False), 1, -1)]
[((20, 8, False), 0, 1)]
[((18, 2, False), 0, 1)]
[((16, 8, False), 1, -1)]
[((13, 9, False), 1, 0), ((21, 9, False), 0, 1)]
[((16, 5, False), 1, 0), ((20, 5, False), 1, -

[((21, 5, False), 0, 1)]
[((19, 6, False), 0, 1)]
[((17, 7, False), 0, -1)]
[((21, 1, True), 0, 1)]
[((18, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((19, 3, False), 0, 1)]
[((13, 9, False), 1, -1)]
[((21, 7, True), 0, 0)]
[((12, 9, False), 0, -1)]
[((13, 2, False), 0, 1)]
[((20, 4, False), 1, -1)]
[((20, 7, False), 0, 1)]
[((12, 3, False), 1, -1)]
[((12, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((13, 8, False), 1, 0), ((16, 8, False), 0, -1)]
[((16, 10, False), 0, -1)]
[((19, 10, True), 0, 1)]
[((21, 7, False), 0, 1)]
[((18, 9, False), 1, -1)]
[((20, 10, False), 0, 0)]
[((19, 1, False), 1, -1)]
[((14, 6, False), 1, 0), ((17, 6, False), 1, 0), ((21, 6, False), 0, 0)]
[((14, 6, False), 1, -1)]
[((12, 7, False), 1, 0), ((20, 7, False), 0, 1)]
[((14, 3, False), 0, -1)]
[((16, 2, False), 0, 1)]
[((20, 5, False), 0, 1)]
[((15, 3, False), 1, 0), ((19, 3, False), 0, -1)]
[((19, 2, False), 0, -1)]
[((18, 7, False), 1, -1)]
[((16, 10, False), 1, 0), ((18, 10, False), 1, -1)]
[((17, 3, Fa

[((14, 1, False), 1, -1)]
[((14, 10, False), 1, -1)]
[((13, 3, False), 1, 0), ((17, 3, False), 1, -1)]
[((15, 10, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((19, 2, True), 1, 0), ((18, 2, False), 1, -1)]
[((17, 10, False), 1, -1)]
[((17, 3, False), 1, -1)]
[((18, 6, False), 1, -1)]
[((15, 10, False), 1, 0), ((18, 10, False), 1, 0), ((19, 10, False), 1, -1)]
[((19, 8, False), 0, 1)]
[((16, 6, False), 1, 0), ((21, 6, False), 0, 1)]
[((21, 10, False), 0, 1)]
[((16, 6, False), 1, -1)]
[((15, 2, False), 1, -1)]
[((17, 10, True), 1, 0), ((21, 10, True), 0, 1)]
[((20, 5, False), 0, 0)]
[((17, 1, False), 0, 0)]
[((16, 5, False), 1, -1)]
[((17, 9, True), 1, 0), ((13, 9, False), 1, -1)]
[((17, 3, False), 1, -1)]
[((13, 6, False), 1, 0), ((17, 6, False), 1, -1)]
[((18, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((19, 1, False), 0, 1)]
[((12, 10, False), 1, 0), ((16, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((12, 4, False), 1, 0), ((17, 4, False), 1, -1)]
[((21, 10, False), 0, 0)]
[((14, 10,

Episode 4000/10000.[((18, 1, False), 1, -1)]
[((18, 3, True), 1, 0), ((20, 3, True), 1, 0), ((18, 3, False), 1, -1)]
[((15, 1, False), 1, -1)]
[((17, 2, False), 1, -1)]
[((20, 10, False), 0, -1)]
[((14, 1, False), 1, -1)]
[((12, 9, False), 1, 0), ((17, 9, False), 1, -1)]
[((17, 1, True), 1, 0), ((19, 1, True), 0, -1)]
[((20, 10, False), 1, -1)]
[((19, 10, True), 0, 1)]
[((15, 5, True), 1, 0), ((20, 5, True), 0, 1)]
[((15, 8, False), 0, 1)]
[((13, 1, False), 0, -1)]
[((15, 2, False), 1, 0), ((18, 2, False), 1, -1)]
[((14, 10, False), 1, -1)]
[((18, 10, False), 0, -1)]
[((20, 5, False), 0, -1)]
[((17, 8, False), 1, -1)]
[((17, 10, False), 1, -1)]
[((21, 6, True), 0, 1)]
[((18, 2, False), 1, -1)]
[((14, 10, True), 1, 0), ((21, 10, True), 0, 1)]
[((12, 10, True), 1, 0), ((15, 10, True), 1, 0), ((17, 10, True), 1, 0), ((17, 10, False), 1, -1)]
[((17, 10, False), 0, -1)]
[((14, 3, False), 1, 0), ((16, 3, False), 1, 0), ((20, 3, False), 0, 0)]
[((16, 3, False), 1, -1)]
[((18, 4, False), 1, 0)

[((15, 10, False), 1, -1)]
[((20, 8, False), 0, 1)]
[((13, 10, False), 0, -1)]
[((18, 8, False), 1, -1)]
[((19, 10, False), 0, 0)]
[((12, 8, False), 0, -1)]
[((18, 8, False), 1, -1)]
[((20, 2, False), 0, 0)]
[((18, 4, False), 1, -1)]
[((12, 4, False), 0, -1)]
[((12, 10, False), 1, 0), ((17, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((13, 8, False), 1, -1)]
[((13, 5, False), 1, 0), ((16, 5, False), 1, -1)]
[((13, 9, False), 1, -1)]
[((18, 5, False), 1, -1)]
[((17, 3, False), 1, 0), ((20, 3, False), 1, -1)]
[((14, 4, False), 1, -1)]
[((20, 7, False), 0, 1)]
[((16, 1, True), 1, 0), ((14, 1, False), 1, 0), ((21, 1, False), 0, 0)]
[((20, 1, True), 0, -1)]
[((12, 10, False), 0, -1)]
[((14, 10, False), 1, -1)]
[((21, 9, True), 0, 0)]
[((14, 10, False), 1, -1)]
[((15, 7, False), 0, -1)]
[((12, 10, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((12, 9, False), 1, -1)]
[((18, 4, False), 0, 1)]
[((12, 4, False), 1, 0), ((19, 4, False), 0, 1)]
[((14, 10, True), 1, 0), ((14, 10, False), 1, -1)]
[((

[((14, 8, False), 1, -1)]
[((20, 5, False), 0, 1)]
[((20, 10, False), 0, 1)]
[((15, 6, True), 0, -1)]
[((17, 8, True), 0, -1)]
[((12, 2, False), 1, 0), ((17, 2, False), 1, -1)]
[((14, 8, False), 0, 1)]
[((13, 1, False), 1, -1)]
[((17, 2, False), 1, 0), ((19, 2, False), 0, -1)]
[((12, 5, False), 0, 1)]
[((21, 6, True), 0, 1)]
[((16, 9, False), 1, -1)]
[((14, 5, False), 1, -1)]
[((17, 4, True), 1, 0), ((12, 4, False), 1, 0), ((20, 4, False), 0, 1)]
[((17, 9, False), 1, -1)]
[((12, 4, False), 1, 0), ((21, 4, False), 0, 1)]
[((19, 5, False), 0, 1)]
[((15, 4, False), 1, -1)]
[((17, 7, False), 1, 0), ((20, 7, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((21, 4, True), 1, 0), ((21, 4, False), 0, 1)]
[((20, 1, False), 1, -1)]
[((16, 9, False), 1, -1)]
[((16, 5, False), 1, -1)]
[((13, 9, False), 1, 0), ((14, 9, False), 1, -1)]
[((15, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((17, 10, False), 1, -1)]
[((20, 1, False), 1, -1)]
[((14, 10, False), 1, 0), ((17, 10, False), 1, -1)]
[((13, 2, False

[((14, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((20, 10, False), 0, 1)]
[((12, 3, False), 1, 0), ((18, 3, False), 1, 0), ((19, 3, False), 0, 1)]
[((16, 4, False), 1, -1)]
[((13, 6, False), 1, -1)]
[((12, 1, False), 1, -1)]
[((12, 10, False), 1, 0), ((16, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((14, 10, False), 1, 0), ((20, 10, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((21, 9, True), 0, 1)]
[((19, 10, True), 0, 1)]
[((15, 10, False), 1, -1)]
[((13, 9, False), 1, -1)]
[((12, 9, False), 1, 0), ((13, 9, False), 1, 0), ((16, 9, False), 0, -1)]
[((17, 4, True), 1, 0), ((17, 4, False), 1, 0), ((18, 4, False), 0, 1)]
[((15, 8, False), 1, -1)]
[((21, 2, False), 0, 1)]
[((20, 9, False), 1, -1)]
[((20, 6, False), 1, -1)]
[((20, 10, False), 0, 0)]
[((17, 10, False), 1, -1)]
[((16, 7, False), 0, -1)]
[((19, 5, False), 0, -1)]
[((15, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((17, 8, False), 1, -1)]
[((15, 6, False), 1, -1)]
[((16, 6, False), 0, 1)]
[((18, 10, True), 1, 0), ((15, 10, 

[((18, 10, False), 1, -1)]
[((13, 1, False), 0, -1)]
[((17, 6, False), 1, -1)]
[((12, 1, False), 0, -1)]
[((17, 6, False), 1, -1)]
[((18, 3, True), 1, 0), ((19, 3, True), 0, 1)]
[((14, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((19, 6, False), 1, -1)]
[((15, 7, False), 1, 0), ((20, 7, False), 0, 1)]
[((16, 10, True), 1, 0), ((14, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((16, 2, False), 1, 0), ((18, 2, False), 1, -1)]
[((19, 9, True), 1, 0), ((16, 9, False), 0, -1)]
[((14, 10, False), 0, -1)]
[((20, 10, False), 1, -1)]
[((19, 10, False), 0, -1)]
[((14, 10, False), 1, 0), ((18, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((16, 4, False), 1, -1)]
[((16, 10, True), 0, -1)]
[((12, 9, True), 1, 0), ((13, 9, True), 0, 1)]
[((16, 7, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((17, 10, False), 1, -1)]
[((17, 4, False), 1, -1)]
[((20, 6, False), 0, 1)]
[((17, 3, True), 1, 0), ((17, 3, False), 1, 0), ((18, 3, False), 0, 1)]
[((16, 5, False), 1, -1)]
[((13, 4, True), 0, -1)]
[((12, 8, Fals

[((17, 8, False), 0, 1)]
[((12, 1, False), 0, 1)]
[((12, 6, False), 1, 0), ((16, 6, False), 0, 1)]
[((20, 10, False), 0, 0)]
[((18, 2, True), 1, 0), ((15, 2, False), 1, -1)]
[((20, 8, False), 0, 1)]
[((20, 7, False), 0, 1)]
[((19, 3, False), 0, 1)]
[((12, 3, False), 1, -1)]
[((15, 8, False), 1, 0), ((20, 8, False), 0, 1)]
[((14, 10, False), 0, -1)]
[((16, 8, False), 1, -1)]
[((14, 4, False), 1, -1)]
[((20, 6, True), 0, 1)]
[((12, 2, False), 1, 0), ((21, 2, False), 0, 1)]
[((17, 6, False), 1, 0), ((18, 6, False), 1, -1)]
[((14, 6, False), 1, -1)]
[((20, 10, False), 0, -1)]
[((12, 9, False), 1, 0), ((18, 9, False), 1, -1)]
[((16, 2, False), 1, -1)]
[((15, 10, False), 1, 0), ((18, 10, False), 1, -1)]
[((14, 8, False), 1, 0), ((17, 8, False), 1, -1)]
[((13, 10, False), 0, -1)]
[((21, 2, True), 0, 1)]
[((15, 2, False), 1, -1)]
[((19, 7, False), 0, 1)]
[((12, 1, False), 1, 0), ((15, 1, False), 0, -1)]
[((17, 10, True), 1, 0), ((14, 10, False), 1, 0), ((17, 10, False), 0, -1)]
[((14, 10, Fals

[((18, 1, False), 1, 0), ((20, 1, False), 0, -1)]
[((14, 10, False), 1, -1)]
[((19, 8, True), 0, 1)]
[((18, 9, False), 1, -1)]
[((17, 2, True), 1, 0), ((15, 2, False), 1, -1)]
[((21, 10, True), 1, 0), ((17, 10, False), 1, 0), ((18, 10, False), 0, 1)]
[((15, 3, False), 1, 0), ((19, 3, False), 0, 1)]
[((21, 5, True), 0, 1)]
[((12, 1, False), 1, 0), ((20, 1, False), 0, 1)]
[((14, 3, False), 1, 0), ((16, 3, False), 1, 0), ((21, 3, False), 0, 1)]
[((13, 10, False), 0, -1)]
[((16, 3, True), 1, 0), ((16, 3, False), 1, 0), ((20, 3, False), 0, 1)]
[((17, 2, False), 1, 0), ((21, 2, False), 0, 1)]
[((12, 7, False), 1, 0), ((17, 7, False), 0, -1)]
[((20, 9, False), 0, 1)]
[((19, 4, False), 0, 1)]
[((14, 10, False), 1, 0), ((15, 10, False), 1, 0), ((16, 10, False), 1, -1)]
[((20, 7, False), 0, -1)]
[((17, 8, False), 1, -1)]
[((18, 8, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((13, 4, True), 1, 0), ((15, 4, True), 1, 0), ((15, 4, False), 1, -1)]
[((17, 10, True), 1, 0), ((19, 10, True), 0, 1)]
[((2

[((12, 10, False), 1, 0), ((19, 10, False), 0, -1)]
[((12, 3, False), 1, -1)]
[((14, 10, False), 0, -1)]
[((20, 5, False), 1, -1)]
[((13, 5, False), 1, 0), ((18, 5, False), 1, -1)]
[((13, 10, False), 1, -1)]
[((21, 5, True), 0, 1)]
[((12, 3, True), 1, 0), ((12, 3, False), 1, 0), ((16, 3, False), 1, 0), ((19, 3, False), 0, 1)]
[((14, 10, False), 0, -1)]
[((18, 3, True), 1, 0), ((20, 3, True), 0, 1)]
[((14, 8, False), 1, -1)]
[((13, 7, False), 0, -1)]
[((14, 9, False), 1, 0), ((16, 9, False), 1, -1)]
[((20, 4, False), 0, -1)]
[((21, 1, False), 0, 1)]
[((14, 7, False), 1, 0), ((16, 7, False), 1, -1)]
[((16, 10, False), 1, -1)]
[((20, 3, False), 0, 0)]
[((16, 9, False), 1, -1)]
[((21, 5, True), 0, 1)]
[((15, 10, False), 1, -1)]
[((20, 4, False), 0, 1)]
[((21, 3, True), 0, 1)]
[((12, 1, False), 1, 0), ((17, 1, False), 1, 0), ((21, 1, False), 0, 1)]
[((13, 10, True), 1, 0), ((12, 10, False), 0, -1)]
[((16, 6, False), 1, 0), ((21, 6, False), 0, 1)]
[((19, 10, False), 0, -1)]
[((14, 5, False),

[((12, 8, False), 1, 0), ((17, 8, False), 1, 0), ((18, 8, False), 1, 0), ((21, 8, False), 0, 1)]
[((17, 3, False), 0, 1)]
[((20, 9, True), 0, 1)]
[((20, 10, False), 0, 0)]
[((16, 10, False), 1, -1)]
[((14, 3, False), 0, -1)]
[((17, 9, False), 0, -1)]
[((13, 10, False), 1, -1)]
[((19, 6, False), 0, 1)]
[((15, 3, False), 1, -1)]
[((19, 5, False), 0, 1)]
[((12, 3, False), 0, -1)]
[((18, 1, False), 0, -1)]
[((12, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((16, 3, False), 1, -1)]
[((19, 10, False), 1, -1)]
[((20, 7, False), 0, 1)]
[((16, 10, False), 0, -1)]
[((12, 2, False), 1, -1)]
[((21, 9, True), 1, 0), ((12, 9, False), 1, 0), ((18, 9, False), 1, -1)]
[((16, 8, False), 1, -1)]
[((14, 8, False), 1, -1)]
[((17, 8, False), 1, -1)]
[((15, 8, False), 0, 1)]
[((17, 7, False), 0, -1)]
[((21, 7, True), 0, 1)]
[((21, 4, True), 0, 1)]
[((17, 10, False), 1, -1)]
[((15, 4, False), 0, -1)]
[((20, 4, True), 0, 0)]
[((18, 4, False), 1, -1)]
[((14, 7, False), 1, -1)]
[((13, 5, False), 1, -1)]
[((21, 

[((20, 6, False), 0, 1)]
[((20, 2, False), 1, -1)]
[((21, 1, False), 0, 1)]
[((16, 10, False), 1, -1)]
[((13, 7, False), 1, 0), ((20, 7, False), 0, 1)]
[((20, 10, False), 0, 0)]
[((15, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((21, 1, True), 1, 0), ((13, 1, False), 1, -1)]
[((20, 3, False), 1, -1)]
[((12, 9, False), 1, -1)]
[((19, 7, False), 0, 1)]
[((19, 3, False), 1, -1)]
[((19, 10, False), 0, -1)]
[((18, 4, False), 1, -1)]
[((12, 6, False), 1, 0), ((20, 6, False), 1, -1)]
[((18, 5, False), 1, -1)]
[((15, 4, False), 1, 0), ((19, 4, False), 1, -1)]
[((19, 3, False), 0, -1)]
[((15, 10, False), 1, -1)]
[((18, 5, False), 1, -1)]
[((13, 10, False), 0, -1)]
[((21, 8, True), 0, 1)]
[((18, 4, False), 1, -1)]
[((16, 9, False), 1, -1)]
[((12, 5, False), 1, 0), ((20, 5, False), 0, 1)]
[((21, 10, False), 0, 1)]
[((13, 1, False), 1, -1)]
[((14, 10, False), 1, 0), ((18, 10, False), 1, -1)]
[((16, 10, True), 0, 1)]
[((12, 10, False), 1, 0), ((13, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((2

[((18, 2, False), 1, -1)]
[((21, 8, True), 0, 1)]
[((21, 1, True), 1, 0), ((21, 1, False), 0, 1)]
[((19, 5, False), 0, -1)]
[((20, 7, False), 1, -1)]
[((17, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((12, 7, False), 1, 0), ((15, 7, False), 1, 0), ((18, 7, False), 1, -1)]
[((12, 8, False), 1, 0), ((15, 8, False), 1, 0), ((16, 8, False), 1, 0), ((18, 8, False), 1, -1)]
[((14, 5, True), 1, 0), ((16, 5, True), 1, 0), ((16, 5, False), 1, -1)]
[((18, 10, False), 1, -1)]
[((16, 4, False), 1, -1)]
[((13, 7, False), 0, -1)]
[((16, 10, False), 0, -1)]
[((14, 10, True), 1, 0), ((15, 10, True), 1, 0), ((18, 10, True), 1, 0), ((18, 10, False), 0, 1)]
[((21, 9, True), 0, 1)]
[((21, 8, True), 1, 0), ((21, 8, False), 0, 1)]
[((15, 6, False), 0, -1)]
[((12, 10, False), 0, -1)]
[((16, 9, False), 1, -1)]
[((12, 10, False), 0, -1)]
[((20, 1, False), 0, -1)]
[((12, 1, False), 1, -1)]
[((19, 4, False), 0, 1)]
[((18, 7, False), 1, 0), ((19, 7, False), 0, 1)]
[((15, 2, False), 1, -1)]
[((21, 2, True), 0, 1

Episode 7000/10000.[((12, 10, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((19, 10, False), 1, -1)]
[((14, 1, True), 0, 1)]
[((16, 10, True), 1, 0), ((16, 10, False), 1, -1)]
[((20, 4, False), 0, 1)]
[((16, 1, False), 0, -1)]
[((15, 7, False), 1, -1)]
[((19, 9, False), 0, 1)]
[((14, 5, False), 0, -1)]
[((17, 6, False), 1, 0), ((21, 6, False), 0, 1)]
[((20, 10, False), 0, 0)]
[((20, 10, False), 0, 0)]
[((19, 2, False), 1, -1)]
[((17, 9, False), 1, -1)]
[((20, 3, False), 1, -1)]
[((17, 2, False), 1, 0), ((20, 2, False), 0, 1)]
[((14, 10, False), 1, 0), ((15, 10, False), 1, 0), ((18, 10, False), 1, -1)]
[((18, 10, False), 1, -1)]
[((14, 5, False), 1, 0), ((15, 5, False), 0, -1)]
[((19, 4, False), 1, -1)]
[((15, 9, False), 1, 0), ((17, 9, False), 1, -1)]
[((15, 2, False), 1, -1)]
[((18, 7, False), 1, -1)]
[((12, 1, False), 1, -1)]
[((15, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((20, 10, False), 0, 1)]
[((12, 1, False), 1, 0), ((14, 1, False), 1, -1)]
[((14, 1, False), 1, -1)]
[((16, 7, 

[((21, 7, True), 0, 1)]
[((14, 4, False), 1, 0), ((18, 4, False), 0, 0)]
[((21, 1, True), 0, 0)]
[((17, 2, False), 1, 0), ((18, 2, False), 1, 0), ((21, 2, False), 0, 1)]
[((20, 2, False), 0, 1)]
[((15, 10, False), 1, -1)]
[((20, 3, False), 0, 1)]
[((15, 10, False), 1, 0), ((21, 10, False), 0, 1)]
[((17, 8, False), 1, -1)]
[((16, 10, False), 1, -1)]
[((14, 3, True), 1, 0), ((16, 3, True), 1, 0), ((16, 3, False), 1, 0), ((20, 3, False), 1, -1)]
[((20, 9, False), 1, -1)]
[((21, 10, True), 0, 0)]
[((13, 8, False), 1, -1)]
[((17, 9, False), 1, -1)]
[((14, 5, False), 1, 0), ((20, 5, False), 1, -1)]
[((20, 2, False), 0, 1)]
[((20, 10, False), 0, 0)]
[((20, 2, False), 0, 1)]
[((15, 10, True), 0, -1)]
[((19, 10, False), 0, -1)]
[((17, 6, False), 0, 1)]
[((19, 5, False), 0, 1)]
[((16, 9, True), 1, 0), ((18, 9, True), 0, -1)]
[((15, 3, False), 1, 0), ((21, 3, False), 0, 1)]
[((14, 5, False), 1, 0), ((16, 5, False), 1, -1)]
[((16, 2, False), 1, -1)]
[((20, 8, False), 0, 1)]
[((19, 3, False), 0, 0)

[((21, 5, False), 0, 1)]
[((18, 1, False), 1, -1)]
[((16, 10, False), 0, -1)]
[((16, 3, True), 1, 0), ((16, 3, False), 1, -1)]
[((15, 10, False), 1, -1)]
[((14, 10, False), 1, -1)]
[((20, 2, False), 1, -1)]
[((12, 10, False), 1, -1)]
[((12, 3, False), 0, 1)]
[((12, 10, False), 1, 0), ((19, 10, False), 0, -1)]
[((19, 6, False), 0, 1)]
[((17, 5, True), 1, 0), ((16, 5, False), 1, -1)]
[((21, 5, True), 0, 1)]
[((19, 9, False), 0, 0)]
[((12, 10, False), 0, -1)]
[((12, 8, False), 1, 0), ((17, 8, False), 1, 0), ((21, 8, False), 0, 1)]
[((18, 5, False), 1, -1)]
[((17, 4, False), 1, 0), ((20, 4, False), 0, 1)]
[((15, 10, False), 1, 0), ((21, 10, False), 0, 0)]
[((18, 7, False), 1, -1)]
[((21, 4, True), 0, 1)]
[((14, 8, False), 1, -1)]
[((12, 10, False), 1, -1)]
[((16, 10, False), 1, -1)]
[((15, 9, False), 1, -1)]
[((15, 7, False), 1, 0), ((20, 7, False), 0, -1)]
[((18, 6, False), 1, -1)]
[((16, 7, False), 0, -1)]
[((21, 9, False), 0, 1)]
[((18, 7, False), 1, -1)]
[((15, 3, False), 1, 0), ((20, 

[((13, 7, False), 1, 0), ((18, 7, False), 1, 0), ((19, 7, False), 1, 0), ((21, 7, False), 0, 1)]
[((12, 9, False), 1, -1)]
[((15, 4, True), 0, 1)]
[((21, 10, True), 1, 0), ((18, 10, False), 1, -1)]
[((13, 10, True), 0, 1)]
[((15, 10, False), 1, 0), ((16, 10, False), 1, -1)]
[((15, 2, False), 0, -1)]
[((12, 7, False), 0, 1)]
[((16, 9, False), 1, -1)]
[((17, 7, True), 1, 0), ((17, 7, False), 1, 0), ((18, 7, False), 1, -1)]
[((12, 6, False), 1, 0), ((17, 6, False), 0, 1)]
[((16, 10, False), 1, -1)]
[((15, 9, False), 1, 0), ((17, 9, False), 1, -1)]
[((14, 10, True), 0, -1)]
[((12, 7, False), 1, 0), ((16, 7, False), 1, 0), ((21, 7, False), 0, 1)]
[((18, 10, False), 1, -1)]
[((14, 8, False), 1, 0), ((18, 8, False), 1, -1)]
[((20, 5, False), 0, 1)]
[((12, 10, False), 1, 0), ((21, 10, False), 1, -1)]
[((15, 10, False), 1, -1)]
[((13, 5, False), 0, 1)]
[((15, 7, False), 1, 0), ((21, 7, False), 1, -1)]
[((15, 10, False), 0, -1)]
[((16, 8, False), 1, -1)]
[((12, 10, True), 1, 0), ((18, 10, True),

[((21, 1, False), 0, 0)]
[((14, 9, False), 0, -1)]
[((18, 9, False), 1, -1)]
[((15, 8, True), 1, 0), ((19, 8, True), 0, -1)]
[((21, 4, True), 0, 1)]
[((12, 7, False), 1, 0), ((17, 7, False), 1, 0), ((19, 7, False), 0, -1)]
[((13, 2, False), 1, -1)]
[((20, 10, False), 1, -1)]
[((19, 8, True), 1, 0), ((19, 8, False), 0, 1)]
[((12, 9, False), 1, -1)]
[((17, 9, False), 1, 0), ((19, 9, False), 0, 1)]
[((21, 8, True), 0, 1)]
[((15, 9, False), 1, 0), ((21, 9, False), 0, 0)]
[((12, 9, False), 0, -1)]
[((16, 10, False), 1, 0), ((18, 10, False), 1, 0), ((19, 10, False), 0, 1)]
[((15, 8, False), 1, -1)]
[((16, 10, False), 1, -1)]
[((14, 8, False), 1, -1)]
[((14, 7, False), 1, 0), ((20, 7, False), 1, -1)]
[((14, 10, False), 0, -1)]
[((20, 5, False), 1, -1)]
[((12, 4, False), 0, 1)]
[((20, 6, False), 0, 1)]
[((14, 5, False), 0, 1)]
[((14, 3, False), 0, 1)]
[((13, 6, False), 1, -1)]
[((21, 10, False), 0, 0)]
[((20, 6, False), 1, -1)]
[((20, 7, False), 0, 1)]
[((17, 6, False), 1, -1)]
[((21, 8, False

[((17, 10, False), 1, -1)]
[((18, 4, False), 1, -1)]
[((15, 6, True), 1, 0), ((17, 6, True), 1, 0), ((19, 6, True), 0, 1)]
[((14, 9, False), 1, 0), ((21, 9, False), 0, 1)]
[((19, 1, False), 0, 1)]
[((15, 8, False), 1, 0), ((18, 8, False), 1, -1)]
[((12, 3, False), 1, 0), ((16, 3, False), 1, 0), ((20, 3, False), 0, 1)]
[((21, 10, True), 1, 0), ((21, 10, False), 0, 1)]
[((16, 9, False), 1, 0), ((21, 9, False), 0, 1)]
[((13, 10, False), 0, -1)]
[((21, 1, False), 0, 0)]
[((19, 10, False), 0, 1)]
[((14, 10, False), 1, 0), ((15, 10, False), 1, 0), ((16, 10, False), 1, -1)]
[((15, 6, False), 1, -1)]
[((16, 10, False), 1, 0), ((20, 10, False), 0, 1)]
[((15, 4, False), 0, -1)]
[((17, 10, False), 0, -1)]
[((14, 8, False), 1, -1)]
[((13, 4, False), 1, -1)]
[((14, 10, False), 1, 0), ((18, 10, False), 1, -1)]
[((14, 4, True), 1, 0), ((18, 4, True), 0, -1)]
[((15, 3, False), 0, 1)]
[((14, 10, False), 1, -1)]
[((18, 5, False), 0, 1)]
[((20, 2, False), 0, 1)]
[((12, 4, False), 1, 0), ((14, 4, False), 

[((14, 1, False), 1, -1)]
[((17, 1, False), 0, 1)]
[((17, 3, False), 1, 0), ((20, 3, False), 1, 0), ((21, 3, False), 0, 1)]
[((15, 6, False), 1, -1)]
[((15, 6, False), 1, -1)]
[((21, 4, True), 0, 1)]
[((18, 7, False), 1, -1)]
[((20, 9, False), 0, 0)]
[((21, 7, True), 0, 1)]
[((12, 6, False), 1, -1)]
[((14, 8, False), 1, -1)]
[((20, 10, False), 1, -1)]
[((17, 8, False), 1, -1)]
[((14, 6, False), 1, 0), ((17, 6, False), 0, 1)]
[((14, 8, False), 1, 0), ((20, 8, False), 0, 1)]
[((14, 7, False), 1, -1)]
[((18, 4, False), 0, 0)]
[((19, 1, False), 0, 1)]
[((12, 3, True), 1, 0), ((12, 3, False), 1, 0), ((16, 3, False), 0, -1)]
[((19, 6, False), 0, -1)]
[((13, 7, False), 0, 1)]
[((14, 8, False), 0, -1)]
[((18, 2, False), 0, 1)]
[((14, 8, False), 1, 0), ((20, 8, False), 0, 1)]
[((12, 3, False), 0, -1)]
[((13, 2, False), 1, -1)]
[((18, 6, False), 1, -1)]
[((16, 10, False), 0, -1)]
[((19, 10, True), 0, -1)]
[((12, 7, False), 1, 0), ((14, 7, False), 1, -1)]
[((18, 2, False), 1, -1)]
[((15, 1, False

[((17, 9, False), 1, -1)]
[((18, 10, False), 1, -1)]
[((12, 2, False), 1, 0), ((18, 2, False), 1, -1)]
[((13, 10, False), 1, 0), ((16, 10, False), 1, -1)]
[((18, 5, False), 0, 1)]
[((13, 10, False), 1, 0), ((16, 10, False), 0, -1)]
[((20, 3, False), 0, 1)]
[((18, 3, False), 1, -1)]
[((13, 10, False), 1, -1)]
[((12, 7, False), 1, -1)]
[((20, 5, False), 0, 1)]
[((12, 10, False), 0, -1)]
[((18, 10, False), 0, -1)]
[((16, 8, False), 1, 0), ((20, 8, False), 1, -1)]
[((17, 1, True), 1, 0), ((17, 1, False), 1, -1)]
[((18, 1, False), 0, -1)]
[((20, 8, False), 0, 1)]
[((21, 7, True), 1, 0), ((14, 7, False), 1, -1)]
[((13, 3, False), 1, -1)]
[((15, 7, True), 1, 0), ((15, 7, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((13, 2, False), 1, 0), ((20, 2, False), 0, 1)]
[((17, 7, False), 1, 0), ((18, 7, False), 0, 1)]
[((13, 2, False), 1, 0), ((19, 2, False), 0, 1)]
[((15, 10, False), 0, -1)]
[((19, 4, False), 0, 1)]
[((19, 10, False), 0, 1)]
[((14, 10, False), 1, -1)]
[((21, 4, True), 1, 0), ((18, 4, 

[((20, 7, False), 0, 1)]
[((12, 10, True), 1, 0), ((12, 10, False), 1, 0), ((15, 10, False), 1, -1)]
[((14, 10, False), 1, 0), ((19, 10, False), 1, -1)]
[((20, 5, False), 0, -1)]
[((21, 2, True), 1, 0), ((21, 2, False), 0, 1)]
[((16, 3, False), 1, 0), ((21, 3, False), 0, 0)]
[((14, 7, False), 0, -1)]
[((15, 10, False), 0, -1)]
[((19, 7, False), 1, 0), ((20, 7, False), 1, -1)]
[((14, 1, True), 1, 0), ((14, 1, False), 0, 1)]
[((16, 6, True), 1, 0), ((16, 6, False), 0, -1)]
[((16, 10, False), 1, -1)]
[((14, 6, True), 0, -1)]
[((13, 10, False), 1, 0), ((15, 10, False), 1, -1)]
[((13, 3, False), 1, -1)]
[((15, 8, False), 1, 0), ((20, 8, False), 0, 0)]
[((12, 6, False), 0, -1)]
[((20, 10, False), 0, 1)]
[((18, 8, False), 1, 0), ((21, 8, False), 0, 1)]
[((19, 3, False), 0, 1)]
[((15, 3, False), 1, -1)]
[((19, 6, False), 0, -1)]
[((20, 6, False), 0, 1)]
[((12, 10, False), 1, 0), ((20, 10, False), 0, 0)]
[((18, 9, False), 0, -1)]
[((19, 5, False), 1, 0), ((20, 5, False), 0, 1)]
[((12, 3, False)

[((15, 10, False), 1, 0), ((19, 10, False), 0, -1)]
[((17, 7, False), 1, -1)]
[((18, 9, False), 1, -1)]
[((20, 10, False), 0, 1)]
[((12, 2, False), 1, 0), ((14, 2, False), 0, 1)]
[((15, 3, False), 1, -1)]
[((12, 8, False), 1, -1)]
[((15, 3, False), 1, 0), ((20, 3, False), 0, 0)]
[((19, 9, False), 1, -1)]
[((14, 10, False), 1, -1)]
[((14, 5, False), 1, 0), ((21, 5, False), 0, 1)]
[((15, 5, False), 1, -1)]
[((17, 10, False), 0, -1)]
[((16, 9, False), 1, -1)]
[((15, 5, False), 1, 0), ((16, 5, False), 0, 1)]
[((20, 6, False), 1, -1)]
[((13, 10, False), 1, 0), ((19, 10, False), 0, -1)]
[((19, 3, True), 0, -1)]
[((17, 3, False), 1, -1)]
[((19, 10, False), 0, -1)]
[((21, 8, True), 0, 1)]
[((18, 7, False), 0, 0)]
[((20, 10, True), 1, 0), ((19, 10, False), 0, 1)]
[((16, 1, False), 0, -1)]
[((12, 1, True), 0, -1)]
[((20, 10, False), 0, 1)]
[((12, 5, False), 1, 0), ((14, 5, False), 1, -1)]
[((18, 1, False), 1, -1)]
[((19, 6, True), 1, 0), ((21, 6, True), 1, 0), ((21, 6, False), 0, 1)]
[((21, 4, F

[((20, 2, False), 0, 1)]
[((18, 8, False), 1, -1)]
[((21, 10, True), 0, 1)]
[((16, 10, False), 0, -1)]
[((21, 10, True), 0, 1)]
[((16, 3, False), 1, 0), ((17, 3, False), 1, 0), ((19, 3, False), 0, 0)]
[((20, 3, False), 0, 0)]
[((15, 1, False), 1, -1)]
[((20, 6, False), 0, 1)]
[((13, 10, False), 1, 0), ((20, 10, False), 0, 0)]
[((17, 6, False), 1, -1)]
[((14, 7, False), 1, 0), ((15, 7, False), 0, -1)]
[((12, 5, False), 1, 0), ((20, 5, False), 1, -1)]
[((18, 10, False), 0, -1)]
[((18, 10, True), 1, 0), ((21, 10, True), 0, 1)]
[((21, 10, True), 0, 1)]
[((16, 9, False), 1, 0), ((19, 9, False), 0, 0)]
[((17, 2, False), 1, 0), ((20, 2, False), 1, -1)]
[((15, 3, False), 1, -1)]
[((16, 5, True), 1, 0), ((21, 5, True), 0, 1)]
[((12, 5, False), 0, -1)]
[((13, 10, False), 1, 0), ((15, 10, False), 1, -1)]
[((21, 7, True), 0, 1)]
[((15, 10, False), 0, -1)]
[((14, 6, False), 1, 0), ((15, 6, False), 0, -1)]
[((21, 10, False), 0, 1)]
[((13, 2, True), 1, 0), ((15, 2, True), 1, 0), ((12, 2, False), 1, 0

Episode 10000/10000.[((14, 10, False), 1, 0), ((21, 10, False), 1, -1)]
