In [1]:
from recogym import Configuration

import numpy as np
from numpy.random import choice
from recogym.agents import Agent

class CFAgent(Agent):

    def __init__(self, config):
        
        super(CFAgent, self).__init__(config) # Set number of products as an attribute of the Agent.

        self.organic_views = np.zeros(self.config.num_products) # Track number of times each item viewed in Organic session.

        self.act_counter = 0
        self.train_counter = 0

    def train(self, observation, action, reward, done):

        # Train method learns from a tuple of data. This method can be called for offline or online learning
        # Adding organic session to organic view counts.

        if observation:
            
            for session in observation.sessions(): # -- LOOP

                print("\n-------------- TRAIN START --------------")
                print(f"train () {self.train_counter} :::: reward {reward}")

                self.organic_views[session['v']] += 1

                print(f"train () {self.train_counter} :::: self.organic_views {self.organic_views}")

                self.train_counter +=1

                print("-------------- TRAIN END --------------\n")

    def act(self, observation, reward, done):

        print("\n-------------- ACT START --------------")

        # An act method takes in an observation, which could either be `None` or an Organic_Session
        # and returns a integer between 0 and num_products indicating which product the agent recommends.

        print(f"act () {self.act_counter} :::: get reward {reward}")
        print(f"act () {self.act_counter} :::: get observation sessions {observation.sessions()}")
        print(f"act () {self.act_counter} :::: have organic_views {self.organic_views}")
        print(f"act () {self.act_counter} :::: have sum(self.organic_views) {sum(self.organic_views)}")

        prob = self.organic_views / sum(self.organic_views)

        print(f"act () {self.act_counter} :::: calc prob {prob}")

        print(f"act () {self.act_counter} :::: have num_products {num_products}")

        # Choosing action randomly in proportion with number of views.

        action = choice(self.config.num_products, p = prob) # GENERATE ACTION ONLINE

        print(f"act () {self.act_counter} :::: return action {action}")
        print(f"act () {self.act_counter} :::: return prob[action] {prob[action]}")

        self.act_counter += 1

        print("-------------- ACT END --------------\n")

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': prob[action]
            }
        }

In [2]:
import gym
from recogym import env_1_args

env_1_args['random_seed'] = 42

env_1_args['num_steps'] = 10

env = gym.make('reco-gym-v1')

env.init_gym(env_1_args)

# Instantiate instance of CFAgent class.

num_products = 10

num_offline_users = 5
num_online_users = 5

cf_agent = CFAgent(Configuration({
    **env_1_args,
    'num_products': num_products,
}))

# Resets random seed back to 42, or whatever we set it to in env_0_args.

env.reset_random_seed()

num_clicks, num_events = 0, 0

# ****** TRAIN OFFLINE ******

unique_offline_user_id = 0

for _ in range(num_offline_users):

    # Reset env and set done to False.

    unique_offline_user_id += 1

    env.reset()
    done = False
    observation, reward, done = None, 0, False

    while not done: # ----- LOOP

        old_observation = observation

        action, observation, reward, done, info = env.step_offline(observation, reward, done)

        cf_agent.train(old_observation, action, reward, done) # TRAIN OFFLINE

# ****** TRAIN ONLINE ******

unique_online_user_id = 0

for _ in range(num_online_users):

    # Reset env and set done to False.

    unique_online_user_id += 1

    env.reset(unique_online_user_id)

    observation, _, done, _ = env.step(None)
    reward = None

    while not done: # ----- LOOP
        
        print("\n========================== ONLINE LOOP START ===========================")

        print(f"ONLINE %%%% RUN act() obs sessions {observation.sessions()}")

        action = cf_agent.act(observation, reward, done) # create recommendation product id

        print(f"ONLINE %%%% action {action}, obs sessions {observation.sessions()} --> RUN step()")

        observation, reward, done, info = env.step(action['a'])

        print(f"ONLINE %%%% calc obs sessions {observation.sessions()}, reward {reward}")

        # Used for calculating click through rate.

        num_clicks += 1 if reward == 1 and reward is not None else 0
        num_events += 1
        
        print(f"ONLINE %%%% num_clicks: {num_clicks}")
        print(f"ONLINE %%%% num_events: {num_events}")
        print(f"ONLINE %%%% CTR {num_clicks / num_events}")
        
        print("============================== ONLINE LOOP END ============================\n")

ctr = num_clicks / num_events

# ------------------------------------

print(f"TOTAL num_clicks: {num_clicks}")
print(f"TOTAL num_events: {num_events}")
print(f"TOTAL Click Through Rate: {ctr:.4f}")



step_offline ---- START done False
step () ---- START action_id None
step () ---- FIRST STEP => RUN generate_organic_sessions()
generate_organic_sessions () INIT - state 0
update_product_view () ---- START, 
omega [[ 0.49671415]
 [-0.1382643 ]
 [ 0.64768854]
 [ 1.52302986]
 [-0.23415337]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [-2.96699295  4.5981963   0.46235256  1.13453518  4.98163826  1.89850636
  5.00703687  1.80687302  0.17419961  0.65963157]
draw_click () ---- ctr [0.00652879 0.01805574 0.01008924 0.01116989 0.01883431 0.01254282
 0.01888522 0.01237086 0.00966572 0.01039314]
draw_click () ---- ctr[recommendation] 0.01805574201938246
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
update_state () ---- old state 1, 
omega [[-0.47022256]
 [ 2.06323369]
 [ 0.89332042]
 [-2.47311541]
 [ 0.99239178]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
step () ---- state 1
step () ---- state 1
step_offline ---- return action {'t': 19, 'u': 0, 'a': 4, 'ps': 0.1, 'ps-a': ()}, obs sessions [], done False
step_offline ---- START done False
step_offline ---- get obs sessions [], reward 0
step () ---- START action_id 4
draw_click () ---- recommendation 4, 
omega [[ 0.25607383]
 [-0.68140955]
 [ 0.13218048]
 [ 0.86343217]
 [ 1.22163181]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 

draw_click () ---- ctr [0.01171893 0.00702676 0.00534176 0.01019022 0.0071617  0.01359673
 0.00636883 0.01012796 0.00636927 0.01295294]
draw_click () ---- ctr[recommendation] 0.0071617049800360676
draw_click () ---- return click 0, recommendation 4
step () ---- reward 0 from action_id 4
update_state () ---- old state 1, 
omega [[-1.91393178]
 [-1.00778035]
 [-0.30452598]
 [ 1.21559189]
 [ 1.56036866]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.460

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-1.93764924]
 [-1.36542142]
 [ 0.0023146 ]
 [ 1.22640857]
 [ 1.62155103]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
step () ---- state 1
step_offline ---- return action {'t': 13, 'u': 0, 'a': 3, 'ps': 0.1, 'ps-a': ()}, obs sessions [{'t': 14, 'u': 0, 'z': 'pageview', 'v': 4}], done False
step_offline ---- START done False
step_offline ---- get obs sessions [{'t': 14, 'u': 0, 'z': 'pageview', 'v': 4}], reward 1
step () ---- START action_id 0
draw_click () ---- recommendation 0, 
omega [[ 1.50502511]
 [-0.50840595]
 [-1.24184052]
 [ 0.18351182]
 [ 0.35315762]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-1.10652591]
 [ 2.5733598 ]
 [ 0.05921843]
 [ 0.01392929]
 [-0.02412509]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
step () ---- state 1
step () ---- state 1
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 33
ONLINE %%%% CTR 0.0


ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 33 :::: get reward 0
act () 33 :::: get observation sessions []
act () 33 :::: have organic_views [ 3.  3.  0.  0. 18.  9.  3.  2.  0.  5.]
act () 33 :::: have sum(self.organic_views) 43.0
act () 33 :::: calc prob [0.06976744 0.06976744 0.         0.         0.41860465 0.20930233
 0.06976744 0.04651163 0.         0.11627907]
act () 33 :::: have num_products 10
act () 33 :::: return action 6
act () 33 :::: return prob[action] 0.06976744186046512
-------------- ACT END --------------

ONLINE %%%% action {'t': 41, 'u': 1, 'a': 6, 'ps': 0.06976744186046512}, obs sessions [] --> RUN step()
step () ---- START action_id 6
draw_click () ---- recommendation 6, 
omega [[-1.03225780e+00]
 [ 2

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-8.77125244e-01]
 [ 2.47683804e+00]
 [-8.76257130e-05]
 [ 9.32770283e-02]
 [-3.09763657e-01]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.50075924  3.06155508 -4.72511089 -1.23026587  0.29983461 -0.35647111
  1.97077961 -1.27965235 -0.08363442  2.32310026]
draw_click () ---- ctr [0.01181026 0.01485518 0.00566938 0.00794717 0.00984738 0.00894961
 0.01267971 0.0078968  0.00930745 0.01336187]
draw_click () ---- ctr[recommendation] 0.01485518210409681
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
update_state () ---- old state 1, 
omega [[-0.74778103]
 [ 2.36234859]
 [ 0.49135191]
 [ 0.66633204]
 [ 0.45490428]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.57364385  3.3412176  -4.96956287 -1.42831996  0.26235956 -0.33345476
  2.14650528 -1.64569834 -0.35546266  2.19768221]
draw_click () ---- ctr [0.01194173 0.01543569 0.0055826  0.00774906 0.00979271 0.00897896
 0.013017   0.00754329 0.00895089 0.01311635]
draw_click () ---- ctr[recommendation] 0.00979271054031608
draw_click () ---- return click 0, recommendation 4
step () ---- reward 0 from action_id 4
update_state () ---- old state 1, 
omega [[-0.73002513]
 [ 2.37433344]
 [ 0.69798988]
 [ 0.64949364]
 [ 0.63627933]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 2.97022424 -2.24664535 -1.85363988  2.53947248  3.30641607  2.89130431
 -4.84858184 -3.45401353  2.72933524  5.13764275]
draw_click () ---- ctr [0.01466694 0.00703435 0.00735749 0.01379169 0.01536317 0.01450494
 0.00562473 0.0062431  0.01417462 0.01914553]
draw_click () ---- ctr[recommendation] 0.01536317258490023
draw_click () ---- return click 0, recommendation 4
step () ---- reward 0 from action_id 4
update_state () ---- old state 1, 
omega [[ 0.52514641]
 [-0.10640975]
 [ 0.26466347]
 [ 0.84663034]
 [-1.11699964]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 3.16863328 -1.96831108 -2.13415695  2.22845171  3.52454469  2.79912631
 -4.72148346 -3.98457778  2.7749567   4.9702994 ]
draw_click () ---- ctr [0.01507679 0.0072595  0.00712322 0.01317633 0.0158186  0.01431657
 0.00567072 0.0059762  0.01426734 0.01881155]
draw_click () ---- ctr[recommendation] 0.015076793979922464
draw_click () ---- return click 0, recommendation 0
step () ---- reward 0 from action_id 0
update_state () ---- old state 1, 
omega [[ 0.73317724]
 [ 0.01193542]
 [ 0.41063406]
 [ 0.86861987]
 [-1.04613117]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869

draw_click () ---- ctr [0.01345186 0.00792061 0.00742769 0.01331917 0.01514716 0.01428499
 0.00616817 0.00646324 0.01295697 0.0182546 ]
draw_click () ---- ctr[recommendation] 0.006463237606800754
draw_click () ---- return click 0, recommendation 7
step () ---- reward 0 from action_id 7
update_state () ---- old state 1, 
omega [[ 0.28263022]
 [ 0.13174628]
 [ 0.51737936]
 [ 0.50213556]
 [-0.74494062]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.4606

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 2.09251571 -1.47490212 -1.69098223  2.39092101  3.22045951  2.92346909
 -3.63555495 -2.60395243  1.94100702  4.45237485]
draw_click () ---- ctr [0.01291272 0.00770395 0.00750192 0.01349578 0.01518437 0.01457089
 0.00614689 0.00677033 0.01262318 0.01775549]
draw_click () ---- ctr[recommendation] 0.01518436738660466
draw_click () ---- return click 1, recommendation 4
step () ---- reward 1 from action_id 4
update_state () ---- old state 1, 
omega [[ 0.21873069]
 [-0.01383195]
 [ 0.38039326]
 [ 0.40208176]
 [-0.64464507]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 

draw_click () ---- ctr [0.01034335 0.0071371  0.00899566 0.01430291 0.02047851 0.01597951
 0.00619585 0.00813988 0.01447731 0.01445682]
draw_click () ---- ctr[recommendation] 0.006195846276301565
draw_click () ---- return click 0, recommendation 6
step () ---- reward 0 from action_id 6
update_state () ---- old state 1, 
omega [[ 1.06434573]
 [-0.24835102]
 [-0.31256399]
 [-0.5602109 ]
 [-0.63291037]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.4606

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
step () ---- state 1
step () ---- state 1
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 4
ONLINE %%%% num_events: 251
ONLINE %%%% CTR 0.01593625498007968


ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 251 :::: get reward 0
act () 251 :::: get observation sessions []
act () 251 :::: have organic_views [ 3.  3.  0.  0. 18.  9.  3.  2.  0.  5.]
act () 251 :::: have sum(self.organic_views) 43.0
act () 251 :::: calc prob [0.06976744 0.06976744 0.         0.         0.41860465 0.20930233
 0.06976744 0.04651163 0.         0.11627907]
act () 251 :::: have num_products 10
act () 251 :::: return action 4
act () 251 :::: return prob[action] 0.4186046511627907
-------------- ACT END --------------

ONLINE %%%% action {'t': 35, 'u': 3, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
step () ---- START action_id 4
draw_click () ---- recommendation 4, 
omega 

act () 276 :::: get reward 0
act () 276 :::: get observation sessions []
act () 276 :::: have organic_views [ 3.  3.  0.  0. 18.  9.  3.  2.  0.  5.]
act () 276 :::: have sum(self.organic_views) 43.0
act () 276 :::: calc prob [0.06976744 0.06976744 0.         0.         0.41860465 0.20930233
 0.06976744 0.04651163 0.         0.11627907]
act () 276 :::: have num_products 10
act () 276 :::: return action 5
act () 276 :::: return prob[action] 0.20930232558139536
-------------- ACT END --------------

ONLINE %%%% action {'t': 13, 'u': 4, 'a': 5, 'ps': 0.20930232558139536}, obs sessions [] --> RUN step()
step () ---- START action_id 5
draw_click () ---- recommendation 5, 
omega [[-0.42590227]
 [-0.31115202]
 [-0.49468666]
 [ 0.83351954]
 [ 1.40937116]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.9080

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.37449599 -1.77356306 -5.45208086 -0.31808774  1.11132204  2.0830524
 -2.76278982 -0.28253233 -1.122717    0.69165212]
draw_click () ---- ctr [0.0115856  0.00742779 0.00542952 0.00899864 0.01113048 0.0128945
 0.00666148 0.00904443 0.00805911 0.01044352]
draw_click () ---- ctr[recommendation] 0.01113048180632216
draw_click () ---- return click 0, recommendation 4
step () ---- reward 0 from action_id 4
update_state () ---- old state 1, 
omega [[-0.19492404]
 [-0.30182719]
 [-0.62929682]
 [ 0.75166424]
 [ 1.19530655]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.37449599 -1.77356306 -5.45208086 -0.31808774  1.11132204  2.0830524
 -2.76278982 -0.28253233 -1.122717    0.69165212]
draw_click () ---- ctr [0.0115856  0.00742779 0.00542952 0.00899864 0.01113048 0.0128945
 0.00666148 0.00904443 0.00805911 0.01044352]
draw_click () ---- ctr[recommendation] 0.007427791497151066
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
update_state () ---- old state 1, 
omega [[-0.19492404]
 [-0.30182719]
 [-0.62929682]
 [ 0.75166424]
 [ 1.19530655]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-0.07618543]
 [-0.04287083]
 [-0.5713335 ]
 [ 0.78424387]
 [ 1.21474498]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-0.07618543]
 [-0.04287083]
 [-0.5713335 ]
 [ 0.78424387]
 [ 1.21474498]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-0.0164713 ]
 [ 0.16138854]
 [-0.56272845]
 [ 0.83830688]
 [ 1.17820156]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[ 0.07955993]
 [ 0.0392797 ]
 [-0.50406919]
 [ 0.82381626]
 [ 1.0296305 ]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.85574443 -1.39805028 -5.22245072 -0.29176464  1.49906088  1.86449103
 -2.73842885 -1.19308884 -0.36632433  1.2895686 ]
draw_click () ---- ctr [0.01246234 0.00777867 0.0054995  0.00903251 0.01180722 0.01247877
 0.00667784 0.00798551 0.0089371  0.01143676]
draw_click () ---- ctr[recommendation] 0.011807215400592303
draw_click () ---- return click 0, recommendation 4
step () ---- reward 0 from action_id 4
update_state () ---- old state 1, 
omega [[ 0.07165888]
 [ 0.02808836]
 [-0.40503928]
 [ 0.85698767]
 [ 0.81611617]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-0.05129608]
 [ 0.07775828]
 [-0.45157032]
 [ 0.84639284]
 [ 1.08055051]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.21086687  0.93156259 -3.63448359  0.65997965 -0.22913626  1.32284582
 -0.24227471 -0.62196655 -0.87850543  3.34822746]
draw_click () ---- ctr [0.0113005  0.01083025 0.00614744 0.01039369 0.0091139  0.01149486
 0.00909673 0.00862221 0.00832499 0.0154503 ]
draw_click () ---- ctr[recommendation] 0.015450303234347382
draw_click () ---- return click 0, recommendation 9
step () ---- reward 0 from action_id 9
update_state () ---- old state 1, 
omega [[-1.4115188 ]
 [ 0.87282087]
 [ 0.48133456]
 [ 0.56978525]
 [ 0.5088055 ]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 1.30749569  0.69413902 -3.4666796   0.86608063 -0.21498044  1.38544392
 -0.53386047 -0.64614884 -0.68023722  3.60685317]
draw_click () ---- ctr [0.01146803 0.01044745 0.00623622 0.01072309 0.00913246 0.01160493
 0.0087286  0.00859339 0.00855306 0.01599088]
draw_click () ---- ctr[recommendation] 0.009132460534983214
draw_click () ---- return click 0, recommendation 4
step () ---- reward 0 from action_id 4
update_state () ---- old state 1, 
omega [[-1.41093476]
 [ 0.81236398]
 [ 0.4348819 ]
 [ 0.62450244]
 [ 0.36048123]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- old state 0, 
omega [[-1.12708957]
 [ 0.58196695]
 [ 0.13108353]
 [ 0.24545385]
 [ 0.69839894]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-1.22755667]
 [ 0.61813716]
 [ 0.48359897]
 [ 0.2787908 ]
 [ 0.85712029]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 1, 
omega [[-1.51142952]
 [ 0.4773246 ]
 [ 0.66990892]
 [ 0.26542914]
 [ 1.30320838]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
draw_click () ---- cached_state_seed [ 0.66931304  0.73195382 -3.49756527  0.9241329  -0.07721876  2.06798745
 -0.33408085 -0.37664543 -1.7846618   2.84691308]
draw_click () ---- ctr [0.01040834 0.01050735 0.00621954 0.01081803 0.00931613 0.01286553
 0.00897816 0.00892402 0.00741796 0.0144141 ]
draw_click () ---- ctr[recommendation] 0.014414104632757479
draw_click () ---- return click 0, recommendation 9
step () ---- reward 0 from action_id 9
update_state () ---- old state 1, 
omega [[-1.55436416]
 [ 0.33256186]
 [ 0.68768804]
 [ 0.20651666]
 [ 1.04556023]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869

 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]]
update_state () ---- new state 0, 
omega [[-1.69836566]
 [ 0.18135967]
 [ 0.75521386]
 [ 0.42993803]
 [ 0.79578439]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819 -0.54438272]
 [ 0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093  0.82254491]
 [-1.22084365  0.2088636  -1.95967012 -1.32818605  0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223  0.34361829 -1.76304016]], 
beta [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.913280

act () 652 :::: get observation sessions []
act () 652 :::: have organic_views [ 3.  3.  0.  0. 18.  9.  3.  2.  0.  5.]
act () 652 :::: have sum(self.organic_views) 43.0
act () 652 :::: calc prob [0.06976744 0.06976744 0.         0.         0.41860465 0.20930233
 0.06976744 0.04651163 0.         0.11627907]
act () 652 :::: have num_products 10
act () 652 :::: return action 5
act () 652 :::: return prob[action] 0.20930232558139536
-------------- ACT END --------------

ONLINE %%%% action {'t': 219, 'u': 5, 'a': 5, 'ps': 0.20930232558139536}, obs sessions [] --> RUN step()
step () ---- START action_id 5
draw_click () ---- recommendation 5, 
omega [[-2.07301978]
 [ 0.59681524]
 [ 0.90574248]
 [ 0.76229815]
 [ 1.04676968]], 
gamma [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]
 [-0.23413696  1.57921282  0.76743473 -0.46947439  0.54256004]
 [-0.46341769 -0.46572975  0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564