In [428]:
import os
import csv
import sys
import re
import pandas as pd

from sklearn.externals import joblib
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from Movielens import Movielens

In [441]:
model = joblib.load("./SVDtuned.pkl")
ml = Movielens()
MovieMatrix, RatingMatrix = ml.loadDataSet()
MovieMatrix.to_pickle("movie_data.pkl")

In [442]:
user_feature = model.pu
movie_feature = model.qi

In [444]:
import numpy as np
import random
from random import randint
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, SGD

class ENV:
    def __init__(self, User_Feature, Movie_Feature, User_Matrix, Movie_Matrix, model):
        self.User_Feature = User_Feature
        self.Movie_Feature = Movie_Feature
        self.User_Matrix = User_Matrix
        self.Movie_Matrix = Movie_Matrix
        self.UserMovie = self.combine_matrix().fillna(0)
        self.state_size = self.UserMovie.shape[1] #user feature
        self.action_size = self.UserMovie.shape[1] #num of movie (actions)
        self.SVDmodel = model
    
    def combine_matrix(self):
        merged=pd.merge(self.User_Matrix,self.Movie_Matrix,left_on='movieId',right_on="movieId")
        return pd.pivot_table(merged, values='rating',
                                   index=['userId'], columns=['movieId'])
        
    def random_select_user(self):
        num = random.randint(1,610) #random pick from 610 user
        self.selectedUser = self.UserMovie.loc[[num]].to_numpy()
        self.selectedUser_num = num
        return self.selectedUser
    
#     def prediction_model(self):
        
    def current_state(self):
        return self.selectedUser
    
    def step(self, action): #given an action to return reward + next_state
        #action in an item recommended by the agent(recommender)
        #real is for real user
#         randomRating = random.randint(0,5)
#         reward = -1 if randomRating == 0 else randomRating
        
#         if(randomRating == 0):
#             reward = -1 #we give a reward of 0 if user not rate (i.e. = 0)
#         else:
#             reward = randomRating #reward just equal to the rating
        #action is in raw id
        to_real_movieID = int(env.Movie_Matrix.iloc[action]['movieId'])
        
        prediction = self.SVDmodel.predict(self.selectedUser_num, to_real_movieID)
        estimatedRating = prediction[3]
        
        reward = estimatedRating
        self.selectedUser[0,action] = reward #update the user's state(history)
        next_state = self.selectedUser
        
        return next_state, reward     

In [445]:
env = ENV(user_feature, movie_feature, RatingMatrix, MovieMatrix, SVDmodel)

In [375]:
env.random_select_user()

array([[3.5, 0. , 0. , ..., 0. , 0. , 0. ]])

In [372]:
env.state_size

9724

In [373]:
env.action_size

9724

In [376]:
env.selectedUser

array([[3.5, 0. , 0. , ..., 0. , 0. , 0. ]])

In [446]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) #use it for storing history
        self.GAMMA = 0.9
        self.EPSILON = 1.0 #let user to explore at first, it will decay
        self.EPSILON_DECAY = 0.995
        self.EPSILON_MIN = 0.01
        self.learning_rate = 0.001 #for model
        self.model = self._build_model()
    
    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=SGD(lr=self.learning_rate)
                      , metrics=['accuracy'])
        
        return model
    
    def remember_memory(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))
  
    def make_act(self, state):
        if np.random.rand() <= 0.3: #exploration
            restart = True
            while restart:
                restart = False
                action = random.randrange(self.action_size)
                for i in range(len(self.memory)):
                    if self.memory[i][1] == action:
                        restart = True
                        break             
            return action
        action_values = self.model.predict(state)
        return np.argmax(action_values[0]) #exploitation
    
    def experience_replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:
            target = reward + self.GAMMA * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
#         if self.EPSILON > self.EPSILON_MIN:
#             self.EPSILON *= self.EPSILON_DECAY


In [447]:
agent = DQNAgent(env.state_size, env.action_size) #intialise agent
batch_size = 32

In [448]:
EPISODE = 200
TIME_STEPS = 40
for e in range(500):
    state = env.random_select_user()
    #print(state.shape)
    for time in range(TIME_STEPS):
        action = agent.make_act(state)
        print("action: {}".format(action))
        next_state, reward = env.step(action)
        print("action: {}, reward: {}, e:{:.2}".format(action, reward, agent.EPSILON))
        agent.remember_memory(state,action,reward,next_state)
        state = next_state
    if len(agent.memory) > batch_size:
        agent.experience_replay(batch_size)

action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 4714
action: 4714, reward: 3.5108978073442048, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7359
action: 7359, reward: 3.4982543405494955, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7076
action: 7076, reward: 3.5902041850540725, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 9252
action: 9252, reward: 3.590603534019707, e:1.0
action: 7626
action: 7626, reward: 3.2190783193789674, e:1.0
action: 8512
action: 8512, reward: 3

action: 2999
action: 2999, reward: 2.836543199598817, e:1.0
action: 6263
action: 6263, reward: 3.48341347359965, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 8136
action: 8136, reward: 3.5660543656563677, e:1.0
action: 3243
action: 3243, reward: 3.385484866567039, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 2306
action: 2306, reward: 3.6805802714535467, e:1.0
action: 953
action: 953, reward: 3.7073566089782983, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 3853
action: 3853, reward: 3.6568136469492623, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.01

action: 7456
action: 7456, reward: 3.7995452959129983, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 2813
action: 2813, reward: 3.522498288628954, e:1.0
action: 3824
action: 3824, reward: 3.313872104293282, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 4318
action: 4318, reward: 3.299728592150476, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 6114
action: 6114, reward: 3.508640176013213, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 989
action: 989, reward: 3.329716455189647, e:1.0
action: 4186
action: 4186, reward: 3.5995865054200253, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, rewa

action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 3563
action: 3563, reward: 3.4202104299091944, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 951
action: 951, reward: 4.156313566399177, e:1.0
action: 9225
action: 9225, reward: 3.5008761961457227, e:1.0
action: 5187
action: 5187, reward: 3.6056403168443785, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 1634
action: 1634, reward: 3.514445735115456, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.60

action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 5147
action: 5147, reward: 3.5864004637412883, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 3254
action: 3254, reward: 3.3211783814168676, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 1889
action: 1889, reward: 3.683620536082818, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 5765
action: 5765, reward: 3.578549423242836, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 9658
action: 9658, reward: 3.5660587813465945, e:1.0
action: 6791
action: 6791,

action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 6101
action: 6101, reward: 3.6772462417204137, e:1.0
action: 6101
action: 6101, reward: 3.6772462417204137, e:1.0
action: 6101
action: 6101, reward: 3.6772462417204137, e:1.0
action: 8540
action: 8540, reward: 3.547538250971609, e:1.0
action: 9500
action: 9500, reward: 3.55746028187987, e:1.0
action: 278
action: 278, reward: 3.736355454987955, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 5654
action: 5654, reward: 3.686084418776285, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2280
action: 2280, reward: 3.129817316376676, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 6494
action: 6494, reward: 3.472315882252106, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.77

action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8912
action: 8912, reward: 3.400280270859477, e:1.0
action: 1168
action: 1168, reward: 3.5384632019494684, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 2568
action: 2568, reward: 4.1900222048062545, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6401
action: 6401, reward: 3.4848032631337347, e:1.0
action: 4483
action: 4483, reward: 2.7966472117159507, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, 

action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 7459
action: 7459, reward: 3.312801259206174, e:1.0
action: 4861
action: 4861, reward: 3.567479695436373, e:1.0
action: 876
action: 876, reward: 3.2691877957656073, e:1.0
action: 876
action: 876, reward: 3.2691877957656073, e:1.0
action: 6818
action: 6818, reward: 3.630461459349121, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 4347
action: 4347, reward: 3.0883987816474145, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 1738
action: 1738, reward: 3.724552953746911, e:1.0
action: 7821
action: 7821, reward: 3.681624467370038, e:1.0
action: 5445
action: 5445, reward: 3.6190

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 9127
action: 9127, reward: 3.5718874149625472, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8664
action: 8664, reward: 3.473305012609053, e:1.0
action: 3168
action: 3168, reward: 3.7070385209639167, e:1.0
action: 4811
action: 4811, reward: 3.4508676287463107, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 5741
action: 5741, reward: 3.639339456494796, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 4863
action: 4863, reward: 3.5445423835965544, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315,

action: 7941
action: 7941, reward: 3.681241885175076, e:1.0
action: 3621
action: 3621, reward: 3.4494944344234977, e:1.0
action: 5220
action: 5220, reward: 3.1363042127515515, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 3526
action: 3526, reward: 3.3192584548634403, e:1.0
action: 825
action: 825, reward: 3.573549639569778, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 3470
action: 3470, reward: 3.402133271763758, e:1.0
action: 1559
action: 1559, reward: 3.569573444401637, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 19
action: 19, reward: 2.873134

action: 1848
action: 1848, reward: 3.6159490899156146, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 2015
action: 2015, reward: 3.9048843387468675, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 3969
action: 3969, reward: 3.254618490346141

action: 9413
action: 9413, reward: 3.526955179788658, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 4265
action: 4265, reward: 3.468666111778834, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 2617
action: 2617, reward: 3.5946464201868755, e:1.0
action: 7044
action: 7044,

action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 3338
action: 3338, reward: 3.4790508681679917, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 5046
action: 5046, reward: 3.617915134600487, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.319619495189584, e:1.0
action: 7001
action: 7001, reward: 3.31961

action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 3318
action: 3318, reward: 3.4063828164331293, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 1631
action: 1631, reward: 3.4237324091053103, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 1456
action: 1456, reward: 3.481749871597027, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 1122
action: 1122, reward: 3.456096038579324, e:1.0
action: 5148
action: 5148, reward: 3.4988352803644935, e:1.0
action: 5679
action: 5679, reward: 3.2567248284621746, e:1.0
action: 9152
action: 9152, reward: 3

action: 325
action: 325, reward: 3.2330318152788555, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 2549
action: 2549, reward: 3.661687346037323, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 1685
action: 1685, reward: 3.387173921951489, e:1.0
action: 4504
action: 4504, reward: 3.979137135872419, e:1.0
action: 1501
action: 1501, reward: 3.1764618223540406, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 8967
action: 8967, re

action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 7605
action: 7605, reward: 3.817167287608692, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 5882
action: 5882, reward: 2.8292908159099026, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 4972
action: 4972, reward: 3.8373338734337796, e:1.0
action: 9616
action: 9616, reward: 3.6

action: 7531, reward: 3.607388201449843, e:1.0
action: 5604
action: 5604, reward: 3.623399795648473, e:1.0
action: 5755
action: 5755, reward: 3.5573960743604047, e:1.0
action: 47
action: 47, reward: 3.547761929721866, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 2107
action: 2107, reward: 3.666282087048887, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1325
action: 1325, reward: 3.1502369739271168, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.51556843

action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3873
action: 3873, reward: 3.7164286952206993, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 2475
action: 2475, reward: 3.7968969553625294, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 6480
action: 6480, reward: 3.5740888084238827, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 7456
action: 7456, reward: 3.7995452959129983, e:1.0
action: 6491
action: 6491, reward: 3.507288597362628, e:1.0
action: 4629
action: 4629, reward: 3.491648546692137, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 1266
action: 1266, reward: 2.766910051681542, e:1.0
action: 4338
action: 4338, 

action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6955
action: 6955, reward: 3.251226947316345, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6466
action: 6466, reward: 3.571675660249143, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 5915
action: 5915,

action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 8493
action: 8493, reward: 3.1348728781306203, e:1.0
action: 3924
action: 3924, reward: 3.5609936193618767, e:1.0
action: 8221
action: 8221, reward: 3.126641337166561, e:1.0
action: 2948
action: 2948, reward: 3.3063690258498597, e:1.0
action: 6636
action: 6636, reward: 3.4201785669706966, e:1.0
action: 2948
action: 2948, reward: 3.3063690258498597, e:1.0
action: 2948
action: 2948, reward: 3.3063690258498597, e:1.0
action: 2948
action: 2948, reward: 3.3063690258498597, e:1.0
action: 9030
action: 9030, reward: 3.5395167214743752, e:1.0
action: 5106
action: 5106, reward: 3.5791416110664436, e:1.0
action: 5204
action: 5204, reward: 3.140879068821211, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, r

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 1730
action: 1730, reward: 4.197651716603404, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 2677
action: 2677, reward: 3.025200612107438, e:1.0
action: 8279
action: 8279, reward: 3.5326106372809147, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 7560
action: 7560, reward: 3.494908142203947, e:1.0
action: 201
action: 201, reward: 3.5929762172842317, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, re

action: 9021
action: 9021, reward: 3.5794254892943864, e:1.0
action: 8806
action: 8806, reward: 3.2469539351403847, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 8806
action: 8806, reward: 3.2469539351403847, e:1.0
action: 8806
action: 8806, reward: 3.2469539351403847, e:1.0
action: 8806
action: 8806, reward: 3.2469539351403847, e:1.0
action: 9297
action: 9297, reward: 3.6717474499352063, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 1251
action: 1251, reward: 3.483128232575035, e:1.0
action: 322
action: 322, reward: 3.983572199441154, e:1.0
action: 8806
action: 8806, reward: 3.2469539351403847, e:1.0
action: 5729
action: 5729, reward: 

action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 4365
action: 4365, reward: 2.6426289881275378, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 3867
action: 3867, reward: 3.4382954474260403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 3564
action: 356

action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8085
action: 8085, reward: 3.5319152427198217, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 3203
action: 3203, reward: 3.8356063763202113, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 3085
action: 3085, reward: 3.567601606690698, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 6331
action: 6331, reward: 4.014760557779872, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 5413
action: 5413, reward:

action: 9225
action: 9225, reward: 3.5008761961457227, e:1.0
action: 5863
action: 5863, reward: 3.6940457516558434, e:1.0
action: 838
action: 838, reward: 2.928473161079368, e:1.0
action: 838
action: 838, reward: 2.928473161079368, e:1.0
action: 2153
action: 2153, reward: 3.7678569255618832, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 3814
action: 3814, reward: 3.5474134989963972, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 7127
action: 7127, reward: 3.4762493918222535, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 7073
action: 7073, reward: 3.6332441450005284, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.6

action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 9550
action: 9550, reward: 3.339561571968875, e:1.0
action: 5382
action: 5382, reward: 3.5112543049084954, e:1.0
action: 1071
action: 1071, reward: 3.187334123702416, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 627
action: 627, reward: 3.5443987532706402, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 249
action: 249, reward: 3.283633222316243, e:1.0
action: 8537
action: 8537, reward: 3.913037408933456, e:1.0
action: 2570
action: 2570, reward: 3.16513

action: 613
action: 613, reward: 4.107552917190478, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 3416
action: 3416, reward: 3.6524392689518375, e:1.0
action: 7243
action: 7243, reward: 3.6504583650524025, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 1966
action: 1966, reward: 3.4925633841167394, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 703
action: 703, reward: 3.5721125306235004, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 642
action: 642, reward: 3.258745562147546, e:1.0
action: 9604
action: 9604, reward: 3.721604568586236, e:1.0


action: 5410
action: 5410, reward: 3.5737617212429713, e:1.0
action: 5272
action: 5272, reward: 3.620998478366147, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 8320
action: 8320, reward: 3.4941204485407145, e:1.0
action: 4568
action: 4568, reward: 4.147962987746433, e:1.0
action: 6609
action: 6609, reward: 3.8977795411109133, e:1.0
action: 8320
action: 8320, reward: 3.4941204485407145, e:1.0
action: 8320
action: 8320, reward: 3.4941204485407145, e:1.0
action: 8320
action: 8320, reward: 3.4941204485407145, e:1.0
action: 848
action: 848, reward: 3.489340204473663, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 7599
action: 7599, reward: 3.242822187840128, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 4356
action: 4356, reward: 3.417632893560506, e:1.0
action: 3370
action: 3370, reward: 3.525109759728153, e:1.0
action: 8691
action: 8691, reward:

action: 763
action: 763, reward: 2.8065902392254656, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 1567
action: 1567, reward: 3.366631786339535, e:1.0
action: 8938
action: 8938, reward: 3.6347657898091406, e:1.0
action: 70
action: 70, reward: 3.398139278891229, e:1.0
action: 6895
action: 6895, reward: 3.491673945190237, e:1.0
action: 1567
action: 1567, reward: 3.366631786339535, e:1.0
action: 1567
action: 1567, reward: 3.366631786339535, e:1.0
action: 1567
action: 1567, reward: 3.366631786339535, e:1.0
action: 1567
action: 1567, reward: 3.366631786339535, e:1.0
action: 9452
action: 9452, reward: 3.8430403299255884, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 5789
action: 5789, reward: 3.5288457314121184, e:1.0
action: 2010
action: 2010, reward: 3.423814743085631, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 7510
action: 7510, reward: 3.84367

action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 7602
action: 7602, reward: 3.9488645128241653, e:1.0
action: 8978
action: 8978, reward: 3.4923918922571064, e:1.0
action: 1062
action: 1062, reward: 3.232406672618899, e:1.0
action: 3156
action: 3156, reward: 3.5684952401771897, e:1.0
action: 5896
action: 5896, reward: 3.5247323702250397, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 7205
action: 7205, reward: 3.531260417511314, e:1.0
action: 4943
action: 4943, reward: 3.214256945617694, e:1.0
action: 1632
action: 1632, reward: 3.576266872482169, e:1.0
action: 4987
action: 4987, reward: 3.6392738688811597, e:1.0
action: 1632
action: 1632, reward: 3.576266872482169, e:1.0
action: 1632
action: 1632, reward: 3.576266872482169, e:1.0
action: 3475
action: 3475, reward:

action: 1648
action: 1648, reward: 3.5176053121456867, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 3438
action: 3438, reward: 2.9241120251619632, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4931
action: 4931, reward: 3.8324600180820125, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4494
action: 4494, reward: 3.5334359836753886, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 4547
action: 4547, reward: 3

action: 9723
action: 9723, reward: 3.4153322895146596, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9267
action: 9267, reward: 3.622279928624332, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 5220
action: 5220, reward: 3.1363042127515515, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 6366
action: 6366, reward: 3.6253258064645815, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 7570
action: 7570, reward: 3.232589098057077, e:1.0
action: 9346
action: 9346, reward: 3.

action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 3434
action: 3434, reward: 3.620577250290906, e:1.0
action: 5755
action: 5755, reward: 3.5573960743604047, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 4340
action: 4340, reward: 3.644909814179445, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 9502
action: 9502, reward: 3.5515194631557385, e:1.0
action: 803
action: 803, reward: 3.610524355481839, e:1.0
action: 470
action: 470, reward: 3.37536

action: 5584
action: 5584, reward: 3.4722821393938994, e:1.0
action: 313
action: 313, reward: 2.681471923730163, e:1.0
action: 8261
action: 8261, reward: 3.3153754676112843, e:1.0
action: 6565
action: 6565, reward: 3.549015715500522, e:1.0
action: 7944
action: 7944, reward: 3.5370227301040043, e:1.0
action: 313
action: 313, reward: 2.681471923730163, e:1.0
action: 7065
action: 7065, reward: 3.6903939486184623, e:1.0
action: 467
action: 467, reward: 3.6092072381610767, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 3663
action: 3663, reward: 3.2984733681469924, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 7064
action: 7064, reward: 2.7253409179125896, e:1.0
action: 4800
action: 4800, reward: 4.157264671639833, e:1.0
action: 5439
action: 5439, reward: 3

action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 333
action: 333, reward: 3.279357712278742, e:1.0
action: 9393
action: 9393, reward: 3.622117137305694, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 853
action: 853, reward: 3.7173548898129374, e:1.0
action: 8006
action: 8006, reward: 3.5769547024323165, e:1.0
action: 1171
action: 1171, reward: 3.2403661138508233, e:1.0
action: 4740
action: 4740, reward: 3.600914518493012, e:1.0
action: 1171
action: 1171, reward: 3.2403661138508233, e:1.0
action: 6284
action: 6284, reward: 3.4204278465203273, e:1.0
action: 7510
action: 7510, reward: 3.843670213355653, e:1.0
action: 9704
action: 9704, reward: 3.4786916406298207, e:1.0
action: 1567
action: 1567, reward: 3.3

action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 7401
action: 7401, reward: 3.573605017526956, e:1.0
action: 1674
action: 1674, reward: 2.8001206709773623, e:1.0
action: 8171
action: 8171, reward: 3.217585246874666, e:1.0
action: 6886
action: 6886, reward: 3.541739666452512, e:1.0
action: 8171
action: 8171, reward: 3.217585246874666, e:1.0
action: 1170
action: 1170, reward: 3.296600461998856, e:1.0
action: 8171
action: 8171, reward: 3.217585246874666, e:1.0
action: 445
action: 445, reward: 3.4281941535758866, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 4892
action: 4892, reward: 3.5149023375940205, e:1.0
action: 9303
action: 9303, reward: 3.538753989

action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 9369
action: 9369, reward: 3.7198815937898253, e:1.0
action: 7308
action: 7308, reward: 3.5961160115712207, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 7861
action: 7861, reward: 3.1415875919499086, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 1177
action: 1177, reward: 3.611506370204633, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 

action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 7323
action: 7323, reward: 4.042886155795016, e:1.0
action: 7171
action: 7171, reward: 3.8263850989176245, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 6095
action: 6095, reward: 3.1575669831255375, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 9039
action: 9039, reward: 3.4892684213458898, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 5090
action: 5090, reward: 3.5098446987062037, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 4743
action: 4743, reward: 3.3498147849073865, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 7242
action: 7242, reward: 3.5803388076251603, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 3138
action: 3138, reward: 3.6627249734744343, e:1.0
action: 3241
action: 3241, reward: 3.4538138622718004, e:1.0
action: 9056
action: 9056, reward: 3.1832956166886444, e:1.0
action: 275
action: 275, 

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 2902
action: 2902, reward: 2.8866473150345806, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 2320
action: 2320, reward: 3.815574801420101, e:1.0
action: 9546
action: 9546, reward: 3.6666223447113997, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 5366
action: 5366, reward: 3.9642203712829174, e:1.0
action: 6563
action: 6563, reward: 3.500646376049088, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 2522
action: 2522, reward: 3.

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9551
action: 9551, reward: 3.658142408780197, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 2348
action: 2348, reward: 3.2795238541793053, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9544
action: 9544, reward: 3.614181315301875, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.29

action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 2198
action: 2198, reward: 3.653810577279868, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 4856
action: 4856, reward: 3.085604394454872, e:1.0
action: 4543
action: 4543, reward: 3.5139980256102246, e:1.0
action: 7450
action: 7450, reward: 3.5925828095316814, e:1.0
action: 9273
action: 9273, reward: 3.5501156128416205, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 3711
action: 3711, reward: 3.663535256719855, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 5639
action: 5639, re

action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3464
action: 3464, reward: 2.9757294495302093, e:1.0
action: 9238
action: 9238, reward: 3.4178552900389367, e:1.0
action: 1309
action: 1309, reward: 3.923501153218459, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 4780
action: 4780, reward: 3.2528892396569473, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 4013
action: 4013, reward: 3.4314457452324274, e:1.0
action: 4740
action: 4740, reward: 3.600914518493012, e:1.0
action: 8269
action: 8269, reward: 3.5990715580293777, e:1.0
action: 6952
action: 6952, reward: 3.4149461225943885, e:1.0
action: 855
action: 855, reward: 3.492272222098003, e:1.0
action: 855
action: 855, reward: 3.492272222098003, e:1.0
action: 855
action: 855, reward:

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 4885
action: 4885, reward: 2.8471966052506414, e:1.0
action: 9105
action: 9105, reward: 3.404596730875291, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 8876
action: 8876, reward: 3.50628368458353, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 1230
action: 1230, reward: 3.8365288651382246, e:1.0
action: 4309
action: 4309, reward: 3.6602368064950883, e:1.0
action: 2851
action: 2851, reward: 3.7444637964726115, e:1.0
action: 2341
action: 2341, reward: 3.408498803496833, e:1.0
action: 2341
action: 2341, reward: 3.408498803496833, e:1.0
action: 5414
action: 5414, reward: 3.4897434006524803, e:1.0
action: 2341
action: 2341, reward: 3.408498803496833, e:1.0
action: 2341
action: 2341, reward: 3.408498803496833, e:1.0
action: 2341
action: 2341, reward: 3.408498803496833, e:1.0
action: 317
action: 317, reward: 3.430698182172032, e:1.0
action: 2341
action: 2341, reward: 3.4

action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 6471
action: 6471, reward: 3.479847753981536, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019

action: 1081
action: 1081, reward: 3.0691938514819714, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 495
action: 495, reward: 3.506294524756275, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2223
action: 2223, reward: 3.650824412542432, e:1.0
action: 8645
action: 8645, reward: 3.5933050870969785, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2735
action: 2735, reward: 3.663758314659221, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 9395
action: 9395, reward: 3.6307585809287617, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.77

action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 3131
action: 3131, reward: 3.127542016282789, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4556
action: 4556, reward: 3.5757527183206195, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 3038
action: 3038, reward: 3.2475242288744592, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 7626
action: 7626, reward: 3.2190783193789674, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 6170
action: 6170, reward: 3.

action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 2171
action: 2171, reward: 3.250036243704727, e:1.0
action: 3665
action: 3665, reward: 3.4522592933299427, e:1.0
action: 7933
action: 7933, reward: 3.624630813427618, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4697
action: 4697, reward: 3.454679751954371, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 3529
action: 3529, reward: 3.2800206737249713, e:1.0
action: 9529
action: 9529, reward: 3.5613542393943454, e:1.0
action: 4435
action: 4435, reward: 3.611662651518381, e:1.0
action: 7622
action: 7622, r

action: 3351
action: 3351, reward: 3.4776351724622816, e:1.0
action: 2715
action: 2715, reward: 3.6496720304151387, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 3139
action: 3139, reward: 3.343412173407483, e:1.0
action: 6041
action: 6041, reward: 3.588532439949405, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 6791
action: 6791, reward: 3.1487362734306203, e:1.0
action: 8955
action: 8955, reward: 3.3909212139662728, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 554
action: 554, reward: 3.8

action: 7286
action: 7286, reward: 3.52306615316063, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 5595
action: 5595, reward: 3.445061465885291, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 654
action: 654, reward: 2.8989355388766462, e:1.0
action: 7631
action: 7631, reward: 2.9251108820429925, e:1.0
action: 2362
action: 2362, reward: 3.074578735732718, e:1.0
action: 1381
action: 1381, reward: 3.5693944209626807, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 6734
action: 6734, reward: 3.7073026136441833, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 5928
action: 5928, reward: 3.6898095589084274, e:1.0
action: 3807
action: 3807, rew

action: 3818
action: 3818, reward: 3.5609027777885864, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 9691
action: 9691, reward: 3.446491389019918, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 7803
action: 7803, reward: 3.2228634883904217, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 3009
action: 3009, reward: 3.137589223568648, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 4478
action: 4478, reward: 3.3249784324055693, e:1.0
action: 2176
action: 2176,

action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 1064
action: 1064, reward: 2.4956767761319476, e:1.0
action: 498
action: 498, reward: 3.093467550872561, e:1.0
action: 7432
action: 7432, reward: 3.395090902590309, e:1.0
action: 9135
action: 9135, reward: 3.5542808171338045, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 7687
action: 7687, reward: 3.699862047109459, e:1.0
action: 6932
action: 6932, reward: 3.5061105961500876, e:1.0
action: 5726
action: 5726, reward: 3.8081008944656287, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 8416
action: 8416, reward: 3.2980732060222957, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 3655
action: 3655, reward: 3.1448694473097056, e:1.0
action: 3655
action: 3655, re

action: 9576
action: 9576, reward: 3.489890453096653, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 5933
action: 5933, reward: 3.4651784623617514, e:1.0
action: 2815
action: 2815, reward: 3.4281185020868765, e:1.0
action: 405
action: 405, reward: 3.6333781772233187, e:1.0
action: 6902
action: 6902, reward: 3.3818585595332347, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 2112
action: 2112, reward: 3.1178924568755524, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 5446
action: 5446, reward: 3.49498002914847, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 9705
action: 9705, reward: 3.368335831030194, e:1.0
action: 9705
action: 9705, reward: 3.3

action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 4539
action: 4539, reward: 3.60186404615419, e:1.0
action: 4800
action: 4800, reward: 4.157264671639833, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 2930
action: 2930, reward: 3.4105953245109455, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 9079
action: 9079, reward: 3.5830039991894562, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 4763
action: 4763, reward: 3.6320605967107045, e:1.0
action: 8691
action: 8691, reward:

action: 7953
action: 7953, reward: 3.296220664662528, e:1.0
action: 6230
action: 6230, reward: 3.166675180550934, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 5771
action: 5771, reward: 3.5486908162722433, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 7559
action: 7559, reward: 3.2717428627432708, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 1733
action: 1733, reward: 3.0683608887388587, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 7058
action: 7058, reward: 3.1498032258620747, e:1.0
action: 324
action: 324, r

action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 6304
action: 6304, reward: 3.652627799545314, e:1.0
action: 8583
action: 8583, reward: 3.912816365980268, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 9661
action: 9661, reward: 3.5963607687954706, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 2545
action: 2545, reward: 3.335288784569103, e:1.0
action: 481
action: 481, reward: 3.6898982356166963, e:1.0
action: 7310
action: 7310, reward: 3.45821604614059, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 7252
action: 7252, reward: 3.800447885898454, e:1.0
action: 470
action: 470, reward: 3.3753611907519376, e:1.0
action: 6747
action: 6747, reward: 3.8387067959219

action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 1069
action: 1069, reward: 2.600533411044888, e:1.0
action: 4854
action: 4854, reward: 3.055771216367354, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 2209
action: 2209, reward: 3.913181774509676, e:1.0
action: 9708
action: 9708, reward: 3.331570333322829, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 5779
action: 5779, reward: 3.029550937638107, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 7680
action: 7680, reward: 3.49

action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 2986
action: 2986, reward: 2.752953796331767, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2481
action: 2481, reward: 3.744257089434656, e:1.0
action: 266
action: 266, reward: 4.10647150188875, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7192
action: 7192, reward: 3.5925470870628047, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7057
action: 7057, reward: 3.1583057199091362, e:1.0
action: 8805
action: 8805, reward: 3.665009548459408, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 1894
action: 1894, reward: 3.339

action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 7392
action: 7392, reward: 3.4608460472929763, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 180

action: 9071
action: 9071, reward: 4.2179770229258775, e:1.0
action: 1535
action: 1535, reward: 3.442172112832225, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2938
action: 2938, reward: 3.685400394704761, e:1.0
action: 282
action: 282, reward: 3.5654277680036395, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 3619
action: 3619, reward: 3.593952340784309, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 5773
action: 5773, reward: 4.045

action: 8319
action: 8319, reward: 3.389203507437722, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 8389
action: 8389, reward: 3.263140755499399, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 7554
action: 7554, reward: 3.9056821804572204, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.29

action: 7038
action: 7038, reward: 3.821638927089582, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 2393
action: 2393, reward: 3.6675582374746187, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 2440
action: 2440, reward: 3.6205812700869524, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 7654
action: 7654, reward: 3.0182052691987877, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 4656
action: 4656, reward: 3.6087369607371733, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581

action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3453
action: 3453, reward: 3.547309629592798, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 4910
action: 4910, reward: 3.4818337945647473, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 1295
action: 1295, reward: 3.783295157012493, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 4488
action: 4488,

action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 1172
action: 1172, reward: 3.0257669270175667, e:1.0
action: 5629
action: 5629, reward: 3.8025141699103293, e:1.0
action: 5223
action: 5223, reward: 3.3989712768730875, e:1.0
action: 28
action: 28, reward: 3.9959532805514333, e:1.0
action: 7094
action: 7094, reward: 3.5295087241948164, e:1.0
action: 5204
action: 5204, reward: 3.140879068821211, e:1.0
action: 6325
action: 6325, reward: 3.6762585799177576, e:1.0
action: 9399
action: 9399, reward: 3.5008226688653274, e:1.0
action: 6518
action: 6518, reward: 3.542682720097039, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 1897
action: 1897, reward: 3.5360551242631497, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 4253
action: 4253, reward: 3.1677090526668334, e:1.0
action: 3411
action: 3411, reward: 

action: 6325
action: 6325, reward: 3.6762585799177576, e:1.0
action: 6255
action: 6255, reward: 3.153734522984659, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 4725
action: 4725, reward: 3.4967935486093995, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 174
action: 174, reward: 3.4083748863364436, e:1.0
action: 6610
action: 6610, reward: 3.4170870740155475, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 9357
action: 9357, reward: 3.776297615379953, e:1.0
action: 3019
action: 3019, r

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 4159
action: 4159, reward: 3.9272708656432673, e:1.0
action: 7715
action: 7715, reward: 3.650637325290834, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 3213
action: 3213, reward: 3.687810229955025, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 5713
action: 5713, reward: 3.3810383991405932, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 1924
action: 1924, reward: 3.153774480243343, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, 

action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 6458
action: 6458, reward: 3.796335671960318, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 7922
action: 7922, reward: 3.567354862108562, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 880
action: 880, re

action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 6331
action: 6331, reward: 4.014760557779872, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 7003
action: 7003, reward: 3.484354092632085, e:1.0
action: 8983
action: 8983, reward: 3.552970562894166, e:1.0
action: 8414
action: 8414, reward: 2.9150578706387553, e:1.0
action: 9438
action: 9438, reward: 3.7993560793542946, e:1.0
action: 6697
action: 6697, reward: 3.9127269311352926, e:1.0
action: 9204
action: 9204, reward: 3.4003628714474754, e:1.0
action: 3406
action: 3406, reward: 3.6915847890257787, e:1.0
action: 6525
action: 6525, reward: 3.4761243394595476, e:1.0
action: 1320
action: 1320, reward: 3.0573275090276257, e:1.0
action: 2005
action: 2005, reward: 3.597210866842187, e:1.0
action: 6525
action: 6525, reward: 3.4761243394595476, e:1.0
action: 6525
action: 6525, reward: 3.4761243394595476, e:1.0
action: 6525
action: 6525, rewa

action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 6549
action: 6549, reward: 3.4904187973327985, e:1.0
action: 1903
action: 1903, reward: 3.573057406269199, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1555
action: 1555, reward: 3.045211226660764, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1845
action: 1845, reward: 3.4848652364871215, e:1.0
action: 4722
action: 4722, reward: 3.62735271545462, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 8022
action: 8022, reward: 3.7780605395990943, e:1.0
action: 6836
action: 6836, reward: 3.622029890321337, e:1.0
action: 6836
action: 6836, rew

action: 7414
action: 7414, reward: 3.5570430872158565, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 7698
action: 7698, reward: 3.455270892499329, e:1.0
action: 3111
action: 3111, reward: 3.4295793267708485, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 3305
action: 3305, reward: 3.5196505865452945, e:1.0
action: 5859
action: 5859, reward: 3.4618813020711574, e:1.0
action: 3305
action: 3305

action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 194
action: 194, reward: 3.0545418210932627, e:1.0
action: 1300
action: 1300, reward: 3.834018005715028, e:1.0
action: 698
action: 698, reward: 4.078869119517593, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 1654
action: 1654, reward: 3.4367879386669937, e:1.0
action: 4072
action: 4072, reward: 3.0154446017351817, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 5104
action: 5104, reward: 3.4727512924007913, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward:

action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 5139
action: 5139, reward: 3.6366270338315427, e:1.0
action: 839
action: 839, reward: 3.456027934383869, e:1.0
action: 6685
action: 6685, reward: 3.486545570197349, e:1.0
action: 3779
action: 3779, reward: 3.1618932558696162, e:1.0
action: 6909
action: 6909, reward: 3.6855453273071603, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8174
action: 8174, reward: 3.3444356897545755, e:1.0
action: 2365
action: 2365, reward: 3.5936771999853874, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 6132
action: 6132, reward: 3.502158601840355, e:1.0
action: 8631
action: 8631, reward: 3.6308461223577226, e:1.0
action: 6199
action: 6199, reward: 3.696258816729257, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 5840
action: 5840, reward: 3

action: 8224
action: 8224, reward: 3.4123830237511914, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 208
action: 208, reward: 3.63858587038245, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 287
action: 287, reward: 3.3941680440650885, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 7374
action: 7374, reward: 3.452790347343423, e:1.0
action: 5004
action: 5004, rewa

action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 7713
action: 7713, reward: 3.581017374699683, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 9022
action: 9022, reward: 3.6716640766418767, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 5939
action: 5939, reward: 3.462175535992718, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.50161642205

action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 6546
action: 6546, reward: 3.389138260537816, e:1.0
action: 4133
action: 4133, reward: 3.4500417919051825, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 8713
action: 8713, reward: 3.376517895362999, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 1570
action: 1570,

action: 4547
action: 4547, reward: 3.586540485952745, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 1189
action: 1189, reward: 3.0694781341668556, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 6181
action: 6181, reward: 3.252640150344966, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.60

action: 5475
action: 5475, reward: 3.606176583505359, e:1.0
action: 6332
action: 6332, reward: 3.657164829832808, e:1.0
action: 6332
action: 6332, reward: 3.657164829832808, e:1.0
action: 6332
action: 6332, reward: 3.657164829832808, e:1.0
action: 6332
action: 6332, reward: 3.657164829832808, e:1.0
action: 4900
action: 4900, reward: 3.4732384404924637, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 8369
action: 8369, reward: 3.5883814684308977, e:1.0
action: 497
action: 497, reward: 2.8911303299885476, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 576
action: 576, reward: 3.0448370457669305, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 8375
action: 8375, reward: 3.195224781412777, e:1.0
action: 4478
action: 4478, reward:

action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 3770
action: 3770, reward: 3.127030755869593, e:1.0
action: 2093
action: 2093, reward: 3.1505252686235004, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 184
action: 184, reward: 3.4330257267143662, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 6461
action: 6461, rew

action: 8384
action: 8384, reward: 3.5416133393163816, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 8827
action: 8827, reward: 3.354749821549082, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 4195
action: 4195, reward: 3.189063252852089, e:1.0
action: 7169
action: 7169, reward: 3.5708565241311865, e:1.0
action: 6527
action: 6527, reward: 4.0284672696311254, e:1.0
action: 6192
action: 6192, reward: 3.7505694300078987, e:1.0
action: 3718
action: 3718, reward: 3.3699719120784652, e:1.0
action: 8165
action: 8165, reward: 3.53300354030513, e:1.0
action: 989
action: 989, reward: 3.329716455189647, e:1.0
action: 989
action: 989, reward: 3.329716455189647, e:1.0
action: 989
action: 989, reward: 3.329716455189647, e:1.0
action: 989
action: 989, reward: 3.329716455189647, e:1.0
action: 989
action: 989, reward: 3.329716455189647, e:1.0
action: 989
action: 989, reward: 3.32971645518

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 3091
action: 3091, reward: 3.200482536524367, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 6825
action: 6825, reward: 3.589404893310373, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 7549
action: 7549, reward: 3.5846865417343383, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 4534
action: 4534, reward: 3.5464251254574464, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 7388
action: 7388, reward: 3.4124805988762734, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 2306
action: 2306, reward: 3.

action: 2205
action: 2205, reward: 3.533644403803965, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 6994
action: 6994, reward: 3.34570556577972, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3190
action: 3190, reward: 3.414096799894012, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 655
action: 655, rew

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 8869
action: 8869, reward: 3.623865854304146, e:1.0
action: 2020
action: 2020, reward: 4.057852906005392, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 1298
action: 1298, reward: 3.8795668540481403, e:1.0
action: 5410
action: 5410, reward: 3.5737617212429713, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 5107
action: 5107, reward: 3.4989001477804624, e:1.0
action: 573
action: 573, reward: 3.283636320781426, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9033
action: 9033, reward: 3.386873787619515, e:1.0
action: 3092
action: 3092, reward: 3.70

action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 482
action: 482, reward: 3.508235547659782, e:1.0
action: 6824
action: 6824, reward: 3.6605171661011378, e:1.0
action: 6413
action: 6413, reward: 2.7653128395200053, e:1.0
action: 7264
action: 7264, reward: 3.551765927072044, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 5971
action: 5971, reward: 3.552726730656214, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, re

action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8851
action: 8851, reward: 2.8872037888190833, e:1.0
action: 8094
action: 8094, reward: 3.5459102879124065, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 2692
action: 2692, reward: 3.5345982886074983, e:1.0
action: 5672
action: 5672, reward: 3.666697818257321, e:1.0
action: 5672
action: 5672, reward: 3.666697818257321, e:1.0
action: 5672
action: 5672, reward: 3.666697818257321, e:1.0
action: 5672
action: 5672, reward: 3.666697818257321, e:1.0
action: 5672
action: 5672, reward: 3.666697818257321, e:1.0
action: 5672
action: 5672, reward: 3.

action: 4801
action: 4801, reward: 3.1319844717788636, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 951
action: 951, reward: 4.156313566399177, e:1.0
action: 937
action: 937, reward: 4.304065391555807, e:1.0
action: 4307
action: 4307, reward: 3.3356837053578965, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 5180
action: 5180, reward: 2.5282677764112704, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 2482
action: 2482, reward: 3.8842117198478983, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8174
action: 8174, reward: 3.3444356897545755, e:1.0
action: 4307
action: 4307, reward: 3.3356837053578965, e:1.0
action: 4307
action: 4307, reward: 3.3356837053578965, e:1.0
action: 4307
action: 4307, reward: 3.3356837053578965, e:1.0
action: 4307
action: 4307, reward: 3.3356837053578965, e:1.0
action: 4307
action: 4307, reward: 

action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7347
action: 7347, reward: 3.601399750627774, e:1.0
action: 9187
action: 9187, reward: 3.502447651361185, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 1366
action: 1366, reward: 3.2807356707382462, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 9026
action: 9026, reward: 3.4697601597082905, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.607388201449843, e:1.0
action: 7531
action: 7531, reward: 3.6

action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 5210
action: 5210, reward: 3.147670284734422, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 6098
action: 6098, reward: 3.4826395410400144, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 2514
action: 2514, reward: 3.9457606937297642, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 3627
action: 3627, reward: 3.7968916617119617, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 6611
action: 6611, reward: 3.437066726219429, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 4944
action: 4944, reward: 3.9215865056853976, e:1.0
action: 2213
action: 2213, reward: 3.820243689184215, e:1.0
action: 9346
action: 9346, reward: 3.298913452478452, e:1.0
action: 9346
action: 9346, reward: 3

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 6107
action: 6107, reward: 3.1503601658123324, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 8657
action: 8657, reward: 3.5247389085232887, e:1.0
action: 2362
action: 2362, reward: 3.074578735732718, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 8310
action: 8310, reward: 3.5881583064239106, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 6142
action: 6142

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8176
action: 8176, reward: 3.493933727256228, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 4386
action: 4386

action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 2574
action: 2574, reward: 3.2055857573780595, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 7200
action: 7200, reward: 3.399092108620373, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940

action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 5747
action: 5747, reward: 3.106613483494426, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 4030
action: 4030, reward: 3.2029881501987987, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019

action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 8408
action: 8408, reward: 3.623301745467128, e:1.0
action: 6520
action: 6520, reward: 2.9023845795047607, e:1.0
action: 6520
action: 6520, reward: 2.9023845795047607, e:1.0
action: 6520
action: 6520, reward: 2.9023845795047607, e:1.0
action: 6520
action: 6520, reward: 2.9023845795047607, e:1.0
action: 8155
action: 8155, reward: 3.437294550300913, e:1.0
action: 6277
action: 6277, reward: 3.8251465866111194, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 7377
action: 7377, reward: 3.34509760085783, e:1.0
action: 7991
action: 7991, reward: 3.3414342988415155, e:1.0
action: 3858
action: 3858, reward: 3.598464070744818, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 2119
action: 2119, reward: 3.054887487621982, e:1.0
action: 4721
action: 4721, reward: 

action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 1046
action: 1046, reward: 3.8369447466979785, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 3059
action: 3059, reward: 3.9215302759131387, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 4929
action: 4929, reward: 3.434076188318906, e:1.0
action: 904
action: 904, reward: 3.7027989179470024, e:1.0
action: 8320
action: 8320, reward: 3.49

action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 936
action: 936, reward: 3.874577128257414, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8436
action: 8436, reward: 3.6268945990540224, e:1.0
action: 4740
action: 4740, reward: 3.600914518493012, e:1.0
action: 8512
action: 8512, reward: 3.491100235718908, e:1.0
action: 8512
action: 8512, reward: 3.4911

action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 5653
action: 5653, reward: 3.5265762057546026, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 2222
action: 2222, reward: 3.9530222740457406, e:1.0
action: 5472
action: 5472, reward: 3.568882907237171, e:1.0
action: 8737
action: 8737, reward: 3.548488496177496, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 6537
action: 6537, reward: 3.936987339336073, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3936
action: 3936, reward: 3.700431718757864, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3837
action: 3837, reward: 3.6096587372870568, e:1.0
action: 3837
action: 3837, r

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5185
action: 5185, reward: 3.583550082040188, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 3369
action: 3369, reward: 2.918673557206564, e:1.0
action: 3197
action: 3197, reward: 3.0251329062439978, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3

action: 6158
action: 6158, reward: 3.5105850537883385, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 9510
action: 9510, reward: 3.542154049132835, e:1.0
action: 5290
action: 5290, reward: 3.5143880432328434, e:1.0
action: 181
action: 181, reward: 3.609305217259632, e:1.0
action: 3791
action: 3791, reward: 3.516196754913639, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 5854
action: 5854, reward: 3.4929507398019344, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 4495
action: 4495, reward: 3.4675570109906872, e:1.0
action: 6260
action: 6260, reward: 3.4314609822081654, e:1.0
action: 5375
action: 5375, reward: 3.4256799395333575, e:1.0
action: 1806
action: 1806, reward: 3.5155684349141323, e:1.0
action: 1806
action: 1806, rewar

action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 9649
action: 9649, reward: 3.7010964338017507, e:1.0
action: 1208
action: 1208, reward: 3.0940382911645132, e:1.0
action: 8458
action: 8458, reward: 3.123063878889947, e:1.0
action: 8458
action: 8458, reward: 3.123063878889947, e:1.0
action: 8458
action: 8458, reward: 3.123063878889947, e:1.0
action: 3783
action: 3783, reward: 3.9360143115065975, e:1.0
action: 8458
action: 8458, reward: 3.123063878889947, e:1.0
action: 3210
action: 3210, reward: 3.9636330565587468, e:1.0
action: 8350
action: 8350, r

action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 6904
action: 6904, reward: 3.6183144625069077, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 8969
action: 8969, reward: 3.2271641174471855, e:1.0
action: 8142
action: 8142, reward: 3.3859045820439166, e:1.0
action: 6837
action: 6837, reward: 2.6410290075335974, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 6837
action: 6837, reward: 2.6410290075335974, e:1.0
action: 4409
action: 4409, reward: 2.7089559561409007, e:1.0
action: 5248
action: 5248, reward: 3.705545868687631, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 8765
action: 8765, reward: 3.6655713178384635, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, reward: 3.5016164220547403, e:1.0
action: 5957
action: 5957, rewa

action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 5310
action: 5310, reward: 3.097623549312307, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 6481
action: 6481, reward: 3.752198996673205, e:1.0
action: 9240
action: 9240, reward: 3.4610742520806173, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 6012
action: 6012, reward: 3.3959895627936727, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 7207
action: 7207, reward: 3.769623806372576, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 3698
action: 3698, reward: 3.2181333100639287, e:1.0
action: 3920
action: 3920, rewa

action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 3347
action: 3347, reward: 3.3453385992746205, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 9716
action: 9716, reward: 3.6194216440679767, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 7651
action: 7651, reward: 3.507260022125717, e:1.0
action: 1343
action: 1343, reward: 3.293776244851143, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.987442368065099, e:1.0
action: 2424
action: 2424, reward: 3.9

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 5987
action: 5987, reward: 3.884864580072357, e:1.0
action: 8306
action: 8306, reward: 3.435265409216254, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 2324
action: 2324, reward: 3.636177433459622, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 4875
action: 4875, reward: 3.3102715325160608, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 1131
action: 1131, reward: 3.51160687939526, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, re

action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 8315
action: 8315, reward: 3.1598481626974984, e:1.0
action: 1268
action: 126

action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2297
action: 2297, reward: 3.8014716887973887, e:1.0
action: 9228
action: 9228, reward: 3.5408011544112896, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 8095
action: 8095, reward: 3.369693292071601, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 7199
action: 7199, reward: 3.7

action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 6860
action: 6860, reward: 3.376227496066595, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 8999
action: 8999, reward: 3.50086696080912, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 7154
action: 7154, reward: 3.849062614251364, e:1.0
action: 7632
action: 7632, reward: 3.452995535945189, e:1.0
action: 2790
action: 2790, reward: 3.535564748772382, e:1.0
action: 3019
action: 3019, reward: 3.6975284389758802, e:1.0
action: 3019
action: 3019, rew

action: 5042
action: 5042, reward: 3.517778670812554, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 5528
action: 5528, reward: 3.4741549065738666, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3676
action: 3676, reward: 3.8902682611827344, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 3411
action: 3411, reward: 3.647080776735062, e:1.0
action: 7437
action: 7437, reward: 3.4970897626504844, e:1.0
action: 1581
action: 1581, reward: 3.0694977731902533, e:1.0
action: 2285
action: 2285, reward: 3.6843439943434215, e:1.0
action: 2285
action: 2285, rew

action: 4918
action: 4918, reward: 3.3907138967700226, e:1.0
action: 9449
action: 9449, reward: 3.649620896007754, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6688
action: 6688, reward: 3.2516978060661543, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 2347
action: 2347, reward: 3.83011540128676, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 5038
action: 5038, reward: 3.633753236236085, e:1.0
action: 6388
action: 6388, reward: 3.5222516403149817, e:1.0
action: 6853
action: 6853, reward: 3.2504917579831423, e:1.0
action: 6853
action: 6853, r

action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 3952
action: 3952, reward: 3.539757477335545, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2897
action: 2897, reward: 2.926490832629342, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 6323
action: 6323, reward: 3.6439990496705903, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8722
action: 8722, reward: 3.73750073788451, e:1.0
action: 8691
action: 8691, reward: 3.770

action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3807
action: 3807, reward: 3.7272701412705826, e:1.0
action: 3435
action: 3435, reward: 3.21401684776589, e:1.0
action: 1518
action: 1518, reward: 3.32803852604889, e:1.0
action: 1144
action: 1144, reward: 3.4207097858072393, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2022
action: 2022, reward: 3.2233129098929507, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 1736
action: 1736, reward: 3.390254721986627, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 504
action: 504, reward: 3.1746828688489375, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 3061
action: 3061, reward: 3.7767809892559194, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3

action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 5445
action: 5445, reward: 3.619018512769876, e:1.0
action: 9089
action: 9089, reward: 3.6835349271396574, e:1.0
action: 4240
action: 4240, reward: 3.6038006550858466, e:1.0
action: 9425
action: 9425, reward: 3.4851605275990054, e:1.0
action: 4240
action: 4240, reward: 3.6038006550858466, e:1.0
action: 4240
action: 4240, reward: 3.6038006550858466, e:1.0
action: 4240
action: 4240, reward: 3.6038006550858466, e:1.0
action: 4240
action: 4240, reward: 3.6038006550858466, e:1.0
action: 4240
action: 4240, reward: 3.6038006550858466, e:1.0
action: 5280
action: 5280, reward: 3.4137505783769626, e:1.0
action: 4240
action: 4240, rewa

action: 3799
action: 3799, reward: 2.6840338050135317, e:1.0
action: 8583
action: 8583, reward: 3.912816365980268, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 8662
action: 8662, reward: 3.5004476755073037, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 4474
action: 4474, reward: 3.735534555954713, e:1.0
action: 6788
action: 6788, reward: 3.6504040652515672, e:1.0
action: 935
action: 935, reward: 4.1940908343766985, e:1.0
action: 935
action: 935, reward: 4.1940

action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 8293
action: 8293, reward: 3.7010162666117505, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 9501
action: 9501, reward: 3.455582575286787, e:1.0
action: 8789
action: 8789, reward: 3.2921035904741287, e:1.0
action: 1606
action: 1606, reward: 3.306589334300607, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 7090
action: 7090, reward: 3.818522053301579, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 7347
action: 7347, reward: 3.601399750627774, e:1.0
action: 8758
action: 8758, reward: 3.53419751371202, e:1.0
action: 22
action: 22, reward: 3.2018366842310013, e:1.0
action: 2

action: 1180
action: 1180, reward: 3.4194205865293696, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 8122
action: 8122, reward: 3.390521769429313, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 2087
action: 2087, reward: 3.4602274638283204, e:1.0
action: 4370
action: 4370, reward: 3.5438843309159918, e:1.0
action: 2695
action: 2695, reward: 3.5135176002276567, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566, reward: 3.7004369156425176, e:1.0
action: 7566
action: 7566

action: 5635
action: 5635, reward: 3.5417867076654725, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 6761
action: 6761, reward: 3.458943658426081, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 1061
action: 1061, reward: 3.383292345478459, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 8027
action: 8027, reward: 3.4716639665951963, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, reward: 3.4955781644704196, e:1.0
action: 3999
action: 3999, rew

action: 4297
action: 4297, reward: 3.2592121260075855, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 9580
action: 9580, reward: 3.653357813588779, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 2670
action: 2670, reward: 3.015812551131464, e:1.0
action: 599
action: 599, reward: 4.0337820637077675, e:1.0
action: 2670
action: 2670, reward: 3.015

action: 1010
action: 1010, reward: 3.645357919001897, e:1.0
action: 1081
action: 1081, reward: 3.0691938514819714, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2832
action: 2832, reward: 3.5418911337485106, e:1.0
action: 3919
action: 3919, reward: 3.4479400177687314, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 7707
action: 7707, reward: 3.652953634442809, e:1.0
action: 8691
action: 8691, reward: 3.

action: 1577
action: 1577, reward: 3.2679090376905227, e:1.0
action: 7926
action: 7926, reward: 3.579009973318889, e:1.0
action: 4918
action: 4918, reward: 3.3907138967700226, e:1.0
action: 4918
action: 4918, reward: 3.3907138967700226, e:1.0
action: 4918
action: 4918, reward: 3.3907138967700226, e:1.0
action: 3065
action: 3065, reward: 3.525150785694292, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225, reward: 3.6911470275923413, e:1.0
action: 8225
action: 8225,

action: 7174
action: 7174, reward: 3.7049911002165112, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 1570
action: 1570, reward: 3.5646244814636385, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 163
action: 163, reward: 2.542368921956787, e:1.0
action: 7101
action: 7101, reward: 3.4101947267611226, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 2859
action: 2859, reward: 2.975057428143523, e:1.0
action: 8691
action: 8691, reward: 3.770810242371262, e:1.0
action: 4043
action: 4043, reward: 3.5791605462032434, e:1.0
action: 6437
action: 6437, reward: 3.6348278903620437, e:1.0
action: 8691
action: 8691, reward: 3.

action: 3485
action: 3485, reward: 3.5325611423372867, e:1.0
action: 2198
action: 2198, reward: 3.653810577279868, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 2803
action: 2803, reward: 3.4837122455360277, e:1.0
action: 4676
action: 4676, reward: 3.599290755081097, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 6421
action: 6421, reward: 3.482087493738455, e:1.0
action: 8673
action: 8673, reward: 3.5110949038020594, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 8149
action: 8149, reward: 3.4688711637866363, e:1.0
action: 8516
action: 8516, reward: 3.7147184011306065, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 486
action: 486, reward: 3.7843083716550936, e:1.0
action: 5834
action: 5834, reward: 3.419378

action: 8050
action: 8050, reward: 3.479721745451122, e:1.0
action: 2672
action: 2672, reward: 3.4995032103481045, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 9682
action: 9682, reward: 3.5099530403014336, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2329
action: 2329, reward: 3.7272964505927972, e:1.0
action: 9097
action: 9097, reward: 3.2341898080344196, e:1.0
action: 6837
action: 6837, reward: 2.6410290075335974, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430, reward: 3.6871672182041864, e:1.0
action: 2430
action: 2430

action: 999
action: 999, reward: 3.7901824648257736, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 3577
action: 3577, reward: 3.3905686775814394, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 5331
action: 5331, reward: 3.3298848144303226, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 4387
action: 4387, reward: 3.464043004327347, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 1219
action: 1219, reward: 3.4773800558754973, e:1.0
action: 2876
action: 2876, reward: 3.5610726007727327, e:1.0
action: 1930
action: 1930, reward: 3.565898469871533, e:1.0
action: 2948
action: 2948, r

action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 6199
action: 6199, reward: 3.696258816729257, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 5029
action: 5029, reward: 3.5151897854043748, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 4492
action: 4492, reward: 3.5608877050111327, e:1.0
action: 5775
action: 5775, reward: 3.8112160272311773, e:1.0
action: 4210
action: 4210, reward: 3.7712002078761664, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 7554
action: 7554, reward: 3.9056821804572204, e:1.0
action: 2239
action: 2239, reward: 3.383729101400716, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 3513
action: 3513, reward: 3.6126825136608276, e:1.0
action: 3513
action: 3513,

In [449]:
b = agent.model.predict(env.selectedUser)
print(b[0])
b[0].argsort()[-10:][::-1]

[ 0.03008332  0.00090214  0.0321472  ... -0.01717252 -0.00748099
 -0.02209356]


array([4780, 6481, 7510, 4677, 1359, 8315, 8537, 8165, 9346, 2670])

In [450]:
env.random_select_user()


array([[0., 0., 0., ..., 0., 0., 0.]])

In [383]:
env.selectedUser

array([[4., 0., 0., ..., 0., 0., 0.]])

In [451]:
a = agent.model.predict(env.selectedUser)
a[0].argsort()[-10:][::-1]

array([7531,  935, 2617, 5747, 6837, 5584, 6481,  944, 9391,  469])

In [385]:
np.argmax(a[0])

9638

In [419]:
env.UserMovie.tail()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
610,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [417]:
env.UserMovie.shape

(610, 9724)

In [452]:
joblib.dump(agent.model, "DQNmodel.pkl")

['DQNmodel.pkl']