In [1]:
%matplotlib inline

import gym
import matplotlib
import numpy as np
import sys

from collections import defaultdict

if "../" not in sys.path:
  sys.path.append("../") 
from lib.envs.blackjack import BlackjackEnv
from lib import plotting

matplotlib.style.use('ggplot')

In [2]:
env = BlackjackEnv()

In [6]:
def mc_prediction(policy, env, num_episodes, discount_factor=1.0):
    """
    Monte Carlo prediction algorithm. Calculates the value function
    for a given policy using sampling.
    
    Args:
        policy: A function that maps an observation to action probabilities.
        env: OpenAI gym environment.
        num_episodes: Number of episodes to sample.
        discount_factor: Gamma discount factor.
    
    Returns:
        A dictionary that maps from state -> value.
        The state is a tuple and the value is a float.
    """

    # Keeps track of sum and count of returns for each state
    # to calculate an average. We could use an array to save all
    # returns (like in the book) but that's memory inefficient.
    returns_sum = defaultdict(float)
    returns_count = defaultdict(float)
    
    # The final value function
    V = defaultdict(float)
    
    for i_episode in range(1, num_episodes + 1):
        # Print out which episode we're on, useful for debugging.
        if i_episode % 1000 == 0:
            print("\rEpisode {}/{}.".format(i_episode, num_episodes), end="")
            sys.stdout.flush()

        # Generate an episode.
        # An episode is an array of (state, action, reward) tuples
        episode = []
        state = env.reset()
        for t in range(100):
            action = policy(state)
            next_state, reward, done, _ = env.step(action)
            episode.append((state, action, reward))
            if done:
                break
            state = next_state

        print("episode lenght:", len(episode))
        print("episode;", episode)
        # Find all states the we've visited in this episode
        # We convert each state to a tuple so that we can use it as a dict key
        states_in_episode = set([tuple(x[0]) for x in episode])
        for state in states_in_episode:
            # Find the first occurance of the state in the episode
            first_occurence_idx = next(i for i,x in enumerate(episode) if x[0] == state)
            print("find state:", state)
            print('find first-->', first_occurence_idx)
            # Sum up all rewards since the first occurance
            print("find state result", episode[first_occurence_idx:])
            G = sum([x[2]*(discount_factor**i) for i, x in enumerate(episode[first_occurence_idx:])])
            # Calculate average return for this state over all sampled episodes
            returns_sum[state] += G
            returns_count[state] += 1.0
            V[state] = returns_sum[state] / returns_count[state]

    return V    

In [4]:
def sample_policy(observation):
    """
    A policy that sticks if the player score is >= 20 and hits otherwise.
    """
    score, dealer_score, usable_ace = observation
    return 0 if score >= 20 else 1

In [7]:
V_10k = mc_prediction(sample_policy, env, num_episodes=10000)
plotting.plot_value_function(V_10k, title="10,000 Steps")

V_500k = mc_prediction(sample_policy, env, num_episodes=500000)
plotting.plot_value_function(V_500k, title="500,000 Steps")

episode lenght: 1
episode; [((21, 5, True), 0, 1)]
find state: (21, 5, True)
find first--> 0
find state result [((21, 5, True), 0, 1)]
episode lenght: 2
episode; [((14, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (14, 10, False)
find first--> 0
find state result [((14, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (21, 10, False)
find first--> 1
find state result [((21, 10, False), 0, 1)]
episode lenght: 1
episode; [((21, 10, True), 0, 1)]
find state: (21, 10, True)
find first--> 0
find state result [((21, 10, True), 0, 1)]
episode lenght: 1
episode; [((13, 2, False), 1, -1)]
find state: (13, 2, False)
find first--> 0
find state result [((13, 2, False), 1, -1)]
episode lenght: 3
episode; [((12, 2, False), 1, 0), ((17, 2, False), 1, 0), ((21, 2, False), 0, 1)]
find state: (17, 2, False)
find first--> 1
find state result [((17, 2, False), 1, 0), ((21, 2, False), 0, 1)]
find state: (12, 2, False)
find first--> 0
find state result [((12, 2, False), 1, 0), ((17, 2, Fal

 1
episode; [((19, 8, False), 1, -1)]
find state: (19, 8, False)
find first--> 0
find state result [((19, 8, False), 1, -1)]
episode lenght: 3
episode; [((14, 9, True), 1, 0), ((19, 9, True), 1, 0), ((18, 9, False), 1, -1)]
find state: (18, 9, False)
find first--> 2
find state result [((18, 9, False), 1, -1)]
find state: (19, 9, True)
find first--> 1
find state result [((19, 9, True), 1, 0), ((18, 9, False), 1, -1)]
find state: (14, 9, True)
find first--> 0
find state result [((14, 9, True), 1, 0), ((19, 9, True), 1, 0), ((18, 9, False), 1, -1)]
episode lenght: 2
episode; [((15, 10, False), 1, 0), ((16, 10, False), 1, -1)]
find state: (16, 10, False)
find first--> 1
find state result [((16, 10, False), 1, -1)]
find state: (15, 10, False)
find first--> 0
find state result [((15, 10, False), 1, 0), ((16, 10, False), 1, -1)]
episode lenght: 3
episode; [((12, 8, False), 1, 0), ((17, 8, False), 1, 0), ((18, 8, False), 1, -1)]
find state: (17, 8, False)
find first--> 1
find state result [((1


find first--> 0
find state result [((21, 10, False), 0, 0)]
episode lenght: 1
episode; [((14, 6, False), 1, -1)]
find state: (14, 6, False)
find first--> 0
find state result [((14, 6, False), 1, -1)]
episode lenght: 2
episode; [((12, 10, False), 1, 0), ((14, 10, False), 1, -1)]
find state: (14, 10, False)
find first--> 1
find state result [((14, 10, False), 1, -1)]
find state: (12, 10, False)
find first--> 0
find state result [((12, 10, False), 1, 0), ((14, 10, False), 1, -1)]
episode lenght: 2
episode; [((12, 2, False), 1, 0), ((21, 2, False), 0, 1)]
find state: (12, 2, False)
find first--> 0
find state result [((12, 2, False), 1, 0), ((21, 2, False), 0, 1)]
find state: (21, 2, False)
find first--> 1
find state result [((21, 2, False), 0, 1)]
episode lenght: 2
episode; [((19, 2, False), 1, 0), ((20, 2, False), 0, -1)]
find state: (19, 2, False)
find first--> 0
find state result [((19, 2, False), 1, 0), ((20, 2, False), 0, -1)]
find state: (20, 2, False)
find first--> 1
find state res

[((16, 10, False), 1, -1)]
episode lenght: 1
episode; [((20, 9, False), 0, 1)]
find state: (20, 9, False)
find first--> 0
find state result [((20, 9, False), 0, 1)]
episode lenght: 1
episode; [((21, 2, True), 0, 1)]
find state: (21, 2, True)
find first--> 0
find state result [((21, 2, True), 0, 1)]
episode lenght: 1
episode; [((21, 3, True), 0, 1)]
find state: (21, 3, True)
find first--> 0
find state result [((21, 3, True), 0, 1)]
episode lenght: 1
episode; [((16, 10, False), 1, -1)]
find state: (16, 10, False)
find first--> 0
find state result [((16, 10, False), 1, -1)]
episode lenght: 2
episode; [((16, 7, False), 1, 0), ((20, 7, False), 0, 1)]
find state: (20, 7, False)
find first--> 1
find state result [((20, 7, False), 0, 1)]
find state: (16, 7, False)
find first--> 0
find state result [((16, 7, False), 1, 0), ((20, 7, False), 0, 1)]
episode lenght: 2
episode; [((15, 4, False), 1, 0), ((19, 4, False), 1, -1)]
find state: (15, 4, False)
find first--> 0
find state result [((15, 4, Fa

 [((17, 10, True), 1, 0), ((20, 10, True), 0, 0)]
find state: (16, 10, True)
find first--> 0
find state result [((16, 10, True), 1, 0), ((17, 10, True), 1, 0), ((20, 10, True), 0, 0)]
find state: (20, 10, True)
find first--> 2
find state result [((20, 10, True), 0, 0)]
episode lenght: 2
episode; [((19, 8, True), 1, 0), ((16, 8, False), 1, -1)]
find state: (19, 8, True)
find first--> 0
find state result [((19, 8, True), 1, 0), ((16, 8, False), 1, -1)]
find state: (16, 8, False)
find first--> 1
find state result [((16, 8, False), 1, -1)]
episode lenght: 2
episode; [((15, 10, False), 1, 0), ((18, 10, False), 1, -1)]
find state: (15, 10, False)
find first--> 0
find state result [((15, 10, False), 1, 0), ((18, 10, False), 1, -1)]
find state: (18, 10, False)
find first--> 1
find state result [((18, 10, False), 1, -1)]
episode lenght: 2
episode; [((17, 5, False), 1, 0), ((21, 5, False), 0, 0)]
find state: (21, 5, False)
find first--> 1
find state result [((21, 5, False), 0, 0)]
find state: (1

find state result [((12, 10, False), 1, 0), ((14, 10, False), 1, 0), ((19, 10, False), 1, 0), ((20, 10, False), 0, -1)]
find state: (20, 10, False)
find first--> 3
find state result [((20, 10, False), 0, -1)]
episode lenght: 3
episode; [((14, 8, False), 1, 0), ((17, 8, False), 1, 0), ((20, 8, False), 0, 1)]
find state: (17, 8, False)
find first--> 1
find state result [((17, 8, False), 1, 0), ((20, 8, False), 0, 1)]
find state: (14, 8, False)
find first--> 0
find state result [((14, 8, False), 1, 0), ((17, 8, False), 1, 0), ((20, 8, False), 0, 1)]
find state: (20, 8, False)
find first--> 2
find state result [((20, 8, False), 0, 1)]
episode lenght: 1
episode; [((16, 10, False), 1, -1)]
find state: (16, 10, False)
find first--> 0
find state result [((16, 10, False), 1, -1)]
episode lenght: 3
episode; [((19, 9, True), 1, 0), ((12, 9, False), 1, 0), ((20, 9, False), 0, 1)]
find state: (20, 9, False)
find first--> 2
find state result [((20, 9, False), 0, 1)]
find state: (19, 9, True)
find fi


find state result [((17, 10, False), 1, 0), ((20, 10, False), 0, 0)]
find state: (14, 10, True)
find first--> 0
find state result [((14, 10, True), 1, 0), ((17, 10, True), 1, 0), ((17, 10, False), 1, 0), ((20, 10, False), 0, 0)]
find state: (20, 10, False)
find first--> 3
find state result [((20, 10, False), 0, 0)]
episode lenght: 1
episode; [((21, 3, False), 0, 1)]
find state: (21, 3, False)
find first--> 0
find state result [((21, 3, False), 0, 1)]
episode lenght: 4
episode; [((14, 10, False), 1, 0), ((15, 10, False), 1, 0), ((18, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (14, 10, False)
find first--> 0
find state result [((14, 10, False), 1, 0), ((15, 10, False), 1, 0), ((18, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (15, 10, False)
find first--> 1
find state result [((15, 10, False), 1, 0), ((18, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (21, 10, False)
find first--> 3
find state result [((21, 10, False), 0, 1)]
find state: (18, 10, False)

(21, 10, False)
find first--> 2
find state result [((21, 10, False), 0, 1)]
find state: (18, 10, False)
find first--> 1
find state result [((18, 10, False), 1, 0), ((21, 10, False), 0, 1)]
episode lenght: 2
episode; [((14, 6, False), 1, 0), ((20, 6, False), 0, -1)]
find state: (20, 6, False)
find first--> 1
find state result [((20, 6, False), 0, -1)]
find state: (14, 6, False)
find first--> 0
find state result [((14, 6, False), 1, 0), ((20, 6, False), 0, -1)]
episode lenght: 3
episode; [((13, 10, False), 1, 0), ((15, 10, False), 1, 0), ((17, 10, False), 1, -1)]
find state: (17, 10, False)
find first--> 2
find state result [((17, 10, False), 1, -1)]
find state: (15, 10, False)
find first--> 1
find state result [((15, 10, False), 1, 0), ((17, 10, False), 1, -1)]
find state: (13, 10, False)
find first--> 0
find state result [((13, 10, False), 1, 0), ((15, 10, False), 1, 0), ((17, 10, False), 1, -1)]
episode lenght: 2
episode; [((12, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: 

 [((15, 6, False), 1, -1)]
find state: (15, 6, False)
find first--> 0
find state result [((15, 6, False), 1, -1)]
episode lenght: 1
episode; [((19, 10, False), 1, -1)]
find state: (19, 10, False)
find first--> 0
find state result [((19, 10, False), 1, -1)]
episode lenght: 1
episode; [((15, 6, False), 1, -1)]
find state: (15, 6, False)
find first--> 0
find state result [((15, 6, False), 1, -1)]
episode lenght: 2
episode; [((17, 1, False), 1, 0), ((21, 1, False), 0, 1)]
find state: (17, 1, False)
find first--> 0
find state result [((17, 1, False), 1, 0), ((21, 1, False), 0, 1)]
find state: (21, 1, False)
find first--> 1
find state result [((21, 1, False), 0, 1)]
episode lenght: 3
episode; [((12, 10, True), 1, 0), ((12, 10, False), 1, 0), ((16, 10, False), 1, -1)]
find state: (16, 10, False)
find first--> 2
find state result [((16, 10, False), 1, -1)]
find state: (12, 10, False)
find first--> 1
find state result [((12, 10, False), 1, 0), ((16, 10, False), 1, -1)]
find state: (12, 10, True

find state result [((21, 1, False), 0, 1)]
find state: (12, 1, False)
find first--> 3
find state result [((12, 1, False), 1, 0), ((16, 1, False), 1, 0), ((21, 1, False), 0, 1)]
find state: (13, 1, True)
find first--> 1
find state result [((13, 1, True), 1, 0), ((17, 1, True), 1, 0), ((12, 1, False), 1, 0), ((16, 1, False), 1, 0), ((21, 1, False), 0, 1)]
find state: (16, 1, False)
find first--> 4
find state result [((16, 1, False), 1, 0), ((21, 1, False), 0, 1)]
find state: (17, 1, True)
find first--> 2
find state result [((17, 1, True), 1, 0), ((12, 1, False), 1, 0), ((16, 1, False), 1, 0), ((21, 1, False), 0, 1)]
find state: (12, 1, True)
find first--> 0
find state result [((12, 1, True), 1, 0), ((13, 1, True), 1, 0), ((17, 1, True), 1, 0), ((12, 1, False), 1, 0), ((16, 1, False), 1, 0), ((21, 1, False), 0, 1)]
episode lenght: 1
episode; [((21, 4, True), 0, 1)]
find state: (21, 4, True)
find first--> 0
find state result [((21, 4, True), 0, 1)]
episode lenght: 1
episode; [((17, 7, Fals


episode lenght: 1
episode; [((19, 10, False), 1, -1)]
find state: (19, 10, False)
find first--> 0
find state result [((19, 10, False), 1, -1)]
episode lenght: 1
episode; [((21, 10, False), 0, 1)]
find state: (21, 10, False)
find first--> 0
find state result [((21, 10, False), 0, 1)]
episode lenght: 1
episode; [((13, 3, False), 1, -1)]
find state: (13, 3, False)
find first--> 0
find state result [((13, 3, False), 1, -1)]
episode lenght: 1
episode; [((14, 8, False), 1, -1)]
find state: (14, 8, False)
find first--> 0
find state result [((14, 8, False), 1, -1)]
episode lenght: 2
episode; [((18, 10, True), 1, 0), ((12, 10, False), 1, -1)]
find state: (18, 10, True)
find first--> 0
find state result [((18, 10, True), 1, 0), ((12, 10, False), 1, -1)]
find state: (12, 10, False)
find first--> 1
find state result [((12, 10, False), 1, -1)]
episode lenght: 2
episode; [((15, 9, False), 1, 0), ((20, 9, False), 0, 1)]
find state: (20, 9, False)
find first--> 1
find state result [((20, 9, False), 0

(13, 10, False)
find first--> 1
find state result [((13, 10, False), 1, 0), ((21, 10, False), 0, 1)]
episode lenght: 1
episode; [((21, 10, True), 0, 1)]
find state: (21, 10, True)
find first--> 0
find state result [((21, 10, True), 0, 1)]
episode lenght: 1
episode; [((18, 10, False), 1, -1)]
find state: (18, 10, False)
find first--> 0
find state result [((18, 10, False), 1, -1)]
episode lenght: 1
episode; [((18, 10, False), 1, -1)]
find state: (18, 10, False)
find first--> 0
find state result [((18, 10, False), 1, -1)]
episode lenght: 3
episode; [((12, 9, False), 1, 0), ((18, 9, False), 1, 0), ((20, 9, False), 0, 1)]
find state: (20, 9, False)
find first--> 2
find state result [((20, 9, False), 0, 1)]
find state: (18, 9, False)
find first--> 1
find state result [((18, 9, False), 1, 0), ((20, 9, False), 0, 1)]
find state: (12, 9, False)
find first--> 0
find state result [((12, 9, False), 1, 0), ((18, 9, False), 1, 0), ((20, 9, False), 0, 1)]
episode lenght: 1
episode; [((16, 10, False),

 (15, 5, False)
find first--> 0
find state result [((15, 5, False), 1, -1)]
episode lenght: 1
episode; [((20, 2, False), 0, 1)]
find state: (20, 2, False)
find first--> 0
find state result [((20, 2, False), 0, 1)]
episode lenght: 1
episode; [((18, 3, False), 1, -1)]
find state: (18, 3, False)
find first--> 0
find state result [((18, 3, False), 1, -1)]
episode lenght: 2
episode; [((15, 7, False), 1, 0), ((20, 7, False), 0, 0)]
find state: (20, 7, False)
find first--> 1
find state result [((20, 7, False), 0, 0)]
find state: (15, 7, False)
find first--> 0
find state result [((15, 7, False), 1, 0), ((20, 7, False), 0, 0)]
episode lenght: 1
episode; [((20, 5, False), 0, 1)]
find state: (20, 5, False)
find first--> 0
find state result [((20, 5, False), 0, 1)]
episode lenght: 2
episode; [((16, 7, False), 1, 0), ((18, 7, False), 1, -1)]
find state: (18, 7, False)
find first--> 1
find state result [((18, 7, False), 1, -1)]
find state: (16, 7, False)
find first--> 0
find state result [((16, 7, F

 1
episode; [((20, 9, True), 0, 1)]
find state: (20, 9, True)
find first--> 0
find state result [((20, 9, True), 0, 1)]
episode lenght: 1
episode; [((20, 2, False), 0, 1)]
find state: (20, 2, False)
find first--> 0
find state result [((20, 2, False), 0, 1)]
episode lenght: 1
episode; [((20, 7, False), 0, 1)]
find state: (20, 7, False)
find first--> 0
find state result [((20, 7, False), 0, 1)]
episode lenght: 3
episode; [((17, 6, True), 1, 0), ((17, 6, False), 1, 0), ((18, 6, False), 1, -1)]
find state: (18, 6, False)
find first--> 2
find state result [((18, 6, False), 1, -1)]
find state: (17, 6, True)
find first--> 0
find state result [((17, 6, True), 1, 0), ((17, 6, False), 1, 0), ((18, 6, False), 1, -1)]
find state: (17, 6, False)
find first--> 1
find state result [((17, 6, False), 1, 0), ((18, 6, False), 1, -1)]
episode lenght: 1
episode; [((13, 1, False), 1, -1)]
find state: (13, 1, False)
find first--> 0
find state result [((13, 1, False), 1, -1)]
episode lenght: 1
episode; [((20,


find first--> 1
find state result [((14, 2, False), 1, -1)]
find state: (14, 2, True)
find first--> 0
find state result [((14, 2, True), 1, 0), ((14, 2, False), 1, -1)]
episode lenght: 1
episode; [((15, 10, False), 1, -1)]
find state: (15, 10, False)
find first--> 0
find state result [((15, 10, False), 1, -1)]
episode lenght: 2
episode; [((14, 3, False), 1, 0), ((18, 3, False), 1, -1)]
find state: (14, 3, False)
find first--> 0
find state result [((14, 3, False), 1, 0), ((18, 3, False), 1, -1)]
find state: (18, 3, False)
find first--> 1
find state result [((18, 3, False), 1, -1)]
episode lenght: 1
episode; [((20, 4, False), 0, 1)]
find state: (20, 4, False)
find first--> 0
find state result [((20, 4, False), 0, 1)]
episode lenght: 1
episode; [((19, 10, False), 1, -1)]
find state: (19, 10, False)
find first--> 0
find state result [((19, 10, False), 1, -1)]
episode lenght: 1
episode; [((17, 6, False), 1, -1)]
find state: (17, 6, False)
find first--> 0
find state result [((17, 6, False),

(17, 10, False)
find first--> 0
find state result [((17, 10, False), 1, -1)]
episode lenght: 1
episode; [((20, 1, False), 0, -1)]
find state: (20, 1, False)
find first--> 0
find state result [((20, 1, False), 0, -1)]
episode lenght: 1
episode; [((15, 10, False), 1, -1)]
find state: (15, 10, False)
find first--> 0
find state result [((15, 10, False), 1, -1)]
episode lenght: 1
episode; [((14, 10, False), 1, -1)]
find state: (14, 10, False)
find first--> 0
find state result [((14, 10, False), 1, -1)]
episode lenght: 1
episode; [((21, 4, True), 0, 1)]
find state: (21, 4, True)
find first--> 0
find state result [((21, 4, True), 0, 1)]
episode lenght: 4
episode; [((13, 5, True), 1, 0), ((15, 5, True), 1, 0), ((12, 5, False), 1, 0), ((20, 5, False), 0, 1)]
find state: (13, 5, True)
find first--> 0
find state result [((13, 5, True), 1, 0), ((15, 5, True), 1, 0), ((12, 5, False), 1, 0), ((20, 5, False), 0, 1)]
find state: (12, 5, False)
find first--> 2
find state result [((12, 5, False), 1, 0),

episode lenght: 1
episode; [((20, 10, False), 0, 1)]
find state: (20, 10, False)
find first--> 0
find state result [((20, 10, False), 0, 1)]
episode lenght: 2
episode; [((17, 8, True), 1, 0), ((17, 8, False), 1, -1)]
find state: (17, 8, False)
find first--> 1
find state result [((17, 8, False), 1, -1)]
find state: (17, 8, True)
find first--> 0
find state result [((17, 8, True), 1, 0), ((17, 8, False), 1, -1)]
episode lenght: 1
episode; [((17, 10, False), 1, -1)]
find state: (17, 10, False)
find first--> 0
find state result [((17, 10, False), 1, -1)]
episode lenght: 1
episode; [((17, 8, False), 1, -1)]
find state: (17, 8, False)
find first--> 0
find state result [((17, 8, False), 1, -1)]
episode lenght: 1
episode; [((16, 10, False), 1, -1)]
find state: (16, 10, False)
find first--> 0
find state result [((16, 10, False), 1, -1)]
episode lenght: 2
episode; [((13, 10, False), 1, 0), ((17, 10, False), 1, -1)]
find state: (17, 10, False)
find first--> 1
find state result [((17, 10, False), 1

find state result [((17, 3, True), 1, 0), ((21, 3, True), 0, 1)]
episode lenght: 2
episode; [((15, 6, True), 1, 0), ((20, 6, True), 0, 1)]
find state: (20, 6, True)
find first--> 1
find state result [((20, 6, True), 0, 1)]
find state: (15, 6, True)
find first--> 0
find state result [((15, 6, True), 1, 0), ((20, 6, True), 0, 1)]
episode lenght: 1
episode; [((20, 3, True), 0, 1)]
find state: (20, 3, True)
find first--> 0
find state result [((20, 3, True), 0, 1)]
episode lenght: 1
episode; [((20, 9, True), 0, 1)]
find state: (20, 9, True)
find first--> 0
find state result [((20, 9, True), 0, 1)]
episode lenght: 1
episode; [((19, 9, False), 1, -1)]
find state: (19, 9, False)
find first--> 0
find state result [((19, 9, False), 1, -1)]
episode lenght: 2
episode; [((15, 8, False), 1, 0), ((17, 8, False), 1, -1)]
find state: (17, 8, False)
find first--> 1
find state result [((17, 8, False), 1, -1)]
find state: (15, 8, False)
find first--> 0
find state result [((15, 8, False), 1, 0), ((17, 8, F

 1
episode; [((21, 6, True), 0, 0)]
find state: (21, 6, True)
find first--> 0
find state result [((21, 6, True), 0, 0)]
episode lenght: 2
episode; [((18, 6, False), 1, 0), ((19, 6, False), 1, -1)]
find state: (18, 6, False)
find first--> 0
find state result [((18, 6, False), 1, 0), ((19, 6, False), 1, -1)]
find state: (19, 6, False)
find first--> 1
find state result [((19, 6, False), 1, -1)]
episode lenght: 2
episode; [((17, 8, False), 1, 0), ((20, 8, False), 0, 1)]
find state: (17, 8, False)
find first--> 0
find state result [((17, 8, False), 1, 0), ((20, 8, False), 0, 1)]
find state: (20, 8, False)
find first--> 1
find state result [((20, 8, False), 0, 1)]
episode lenght: 2
episode; [((13, 9, False), 1, 0), ((20, 9, False), 0, 0)]
find state: (13, 9, False)
find first--> 0
find state result [((13, 9, False), 1, 0), ((20, 9, False), 0, 0)]
find state: (20, 9, False)
find first--> 1
find state result [((20, 9, False), 0, 0)]
episode lenght: 1
episode; [((16, 5, False), 1, -1)]
find sta

[((12, 2, False), 1, 0), ((14, 2, False), 1, -1)]
find state: (14, 2, False)
find first--> 1
find state result [((14, 2, False), 1, -1)]
find state: (12, 2, False)
find first--> 0
find state result [((12, 2, False), 1, 0), ((14, 2, False), 1, -1)]
episode lenght: 1
episode; [((15, 9, False), 1, -1)]
find state: (15, 9, False)
find first--> 0
find state result [((15, 9, False), 1, -1)]
episode lenght: 1
episode; [((17, 4, False), 1, -1)]
find state: (17, 4, False)
find first--> 0
find state result [((17, 4, False), 1, -1)]
episode lenght: 1
episode; [((19, 2, False), 1, -1)]
find state: (19, 2, False)
find first--> 0
find state result [((19, 2, False), 1, -1)]
episode lenght: 1
episode; [((21, 3, True), 0, 1)]
find state: (21, 3, True)
find first--> 0
find state result [((21, 3, True), 0, 1)]
episode lenght: 2
episode; [((12, 4, False), 1, 0), ((19, 4, False), 1, -1)]
find state: (12, 4, False)
find first--> 0
find state result [((12, 4, False), 1, 0), ((19, 4, False), 1, -1)]
find stat

 (20, 3, True)
find first--> 0
find state result [((20, 3, True), 0, 1)]
episode lenght: 1
episode; [((14, 1, False), 1, -1)]
find state: (14, 1, False)
find first--> 0
find state result [((14, 1, False), 1, -1)]
episode lenght: 1
episode; [((14, 3, False), 1, -1)]
find state: (14, 3, False)
find first--> 0
find state result [((14, 3, False), 1, -1)]
episode lenght: 1
episode; [((20, 1, True), 0, 1)]
find state: (20, 1, True)
find first--> 0
find state result [((20, 1, True), 0, 1)]
episode lenght: 1
episode; [((17, 3, False), 1, -1)]
find state: (17, 3, False)
find first--> 0
find state result [((17, 3, False), 1, -1)]
episode lenght: 2
episode; [((15, 7, False), 1, 0), ((16, 7, False), 1, -1)]
find state: (15, 7, False)
find first--> 0
find state result [((15, 7, False), 1, 0), ((16, 7, False), 1, -1)]
find state: (16, 7, False)
find first--> 1
find state result [((16, 7, False), 1, -1)]
episode lenght: 1
episode; [((21, 7, True), 0, 1)]
find state: (21, 7, True)
find first--> 0
find

find state: (16, 10, False)
find first--> 0
find state result [((16, 10, False), 1, 0), ((18, 10, False), 1, -1)]
find state: (18, 10, False)
find first--> 1
find state result [((18, 10, False), 1, -1)]
episode lenght: 1
episode; [((15, 6, False), 1, -1)]
find state: (15, 6, False)
find first--> 0
find state result [((15, 6, False), 1, -1)]
episode lenght: 2
episode; [((12, 10, False), 1, 0), ((19, 10, False), 1, -1)]
find state: (19, 10, False)
find first--> 1
find state result [((19, 10, False), 1, -1)]
find state: (12, 10, False)
find first--> 0
find state result [((12, 10, False), 1, 0), ((19, 10, False), 1, -1)]
episode lenght: 3
episode; [((12, 10, False), 1, 0), ((13, 10, False), 1, 0), ((16, 10, False), 1, -1)]
find state: (16, 10, False)
find first--> 2
find state result [((16, 10, False), 1, -1)]
find state: (12, 10, False)
find first--> 0
find state result [((12, 10, False), 1, 0), ((13, 10, False), 1, 0), ((16, 10, False), 1, -1)]
find state: (13, 10, False)
find first--> 1


episode lenght: 1
episode; [((17, 2, False), 1, -1)]
find state: (17, 2, False)
find first--> 0
find state result [((17, 2, False), 1, -1)]
episode lenght: 1
episode; [((16, 1, False), 1, -1)]
find state: (16, 1, False)
find first--> 0
find state result [((16, 1, False), 1, -1)]
episode lenght: 2
episode; [((14, 7, False), 1, 0), ((17, 7, False), 1, -1)]
find state: (14, 7, False)
find first--> 0
find state result [((14, 7, False), 1, 0), ((17, 7, False), 1, -1)]
find state: (17, 7, False)
find first--> 1
find state result [((17, 7, False), 1, -1)]
episode lenght: 2
episode; [((12, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (12, 10, False)
find first--> 0
find state result [((12, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (21, 10, False)
find first--> 1
find state result [((21, 10, False), 0, 1)]
episode lenght: 1
episode; [((18, 2, False), 1, -1)]
find state: (18, 2, False)
find first--> 0
find state result [((18, 2, False), 1, -1)]
episode lenght: 1
episode

[((20, 10, False), 0, 0)]
episode lenght: 1
episode; [((20, 8, False), 0, 0)]
find state: (20, 8, False)
find first--> 0
find state result [((20, 8, False), 0, 0)]
episode lenght: 2
episode; [((16, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (16, 10, False)
find first--> 0
find state result [((16, 10, False), 1, 0), ((21, 10, False), 0, 1)]
find state: (21, 10, False)
find first--> 1
find state result [((21, 10, False), 0, 1)]
episode lenght: 2
episode; [((18, 10, True), 1, 0), ((21, 10, True), 0, 0)]
find state: (18, 10, True)
find first--> 0
find state result [((18, 10, True), 1, 0), ((21, 10, True), 0, 0)]
find state: (21, 10, True)
find first--> 1
find state result [((21, 10, True), 0, 0)]
episode lenght: 1
episode; [((14, 3, False), 1, -1)]
find state: (14, 3, False)
find first--> 0
find state result [((14, 3, False), 1, -1)]
episode lenght: 1
episode; [((15, 7, False), 1, -1)]
find state: (15, 7, False)
find first--> 0
find state result [((15, 7, False), 1, -1)]
episo

KeyboardInterrupt: 