In [1]:
from copy import deepcopy
import recogym
from recogym import Configuration

import numpy as np
from numpy.random import choice
from recogym.agents import Agent

import pandas as pd
from surprise import Dataset
from surprise import Reader

ratings_dict = {

    "product": [],
    "user": [],
    "rating": [],
}

class StaticAgent(Agent):

    def __init__(self, config):
        
        super(StaticAgent, self).__init__(config) # Set number of products as an attribute of the Agent.

        self.organic_views = np.zeros(self.config.num_products) # Track number of times each item viewed in Organic session.

        self.act_counter = 0
        self.train_counter = 0

    def train(self, observation, action, reward, done):

        # Train method learns from a tuple of data. This method can be called for offline or online learning
        # Adding organic session to organic view counts.

        if observation:

            for session in observation.sessions(): # -- LOOP

                print(f"train () :::: session {session}")
                
                ratings_dict['product'].append(session['v']) # viewed product
                ratings_dict['user'].append(session['u'])
                ratings_dict['rating'].append(1) # scale 1 - organic view, 0 - no view 

                print("\n-------------- TRAIN START --------------")
                
                print(f"train () {self.train_counter} :::: reward {reward}")

                self.organic_views[session['v']] += 1

                print(f"train () {self.train_counter} :::: self.organic_views {self.organic_views}")

                self.train_counter +=1

                print("-------------- TRAIN END --------------\n")

        print(f"train () TRAIN FINISHED ratings_dict {ratings_dict}")

    def act(self, observation, reward, done):

        #-------------------------------------------------------------------
        # INSTEAD OF TRAIN THE AGENT - USE COLLABORATIVE FILTERING FRAMEWORK
        #-------------------------------------------------------------------

        print("\n-------------- ACT START --------------")

        # An act method takes in an observation, which could either be `None` or an Organic_Session
        # and returns a integer between 0 and num_products indicating which product the agent recommends.

        print(f"act () {self.act_counter} :::: get reward {reward}")
        print(f"act () {self.act_counter} :::: get observation sessions {observation.sessions()}")
        print(f"act () {self.act_counter} :::: have organic_views {self.organic_views}")
        print(f"act () {self.act_counter} :::: have sum(self.organic_views) {sum(self.organic_views)}")

        prob = self.organic_views / sum(self.organic_views)

        print(f"act () {self.act_counter} :::: calc prob {prob}")

        print(f"act () {self.act_counter} :::: have num_products {num_products}")

        #--------------------------------------------------
 
        # action = choice(self.config.num_products, p = prob) # Choosing action RANDOMLY in proportion with number of views.

        action = 1

        # TODO: create action based on CF Surprise lib recommendation:
        # userId, Propensity Scores => MOST RELEVANT Product Id
        
        #--------------------------------------------------

        print(f"act () {self.act_counter} :::: return action {action}")
        print(f"act () {self.act_counter} :::: return prob[action] {prob[action]}")

        self.act_counter += 1

        print("-------------- ACT END --------------\n")

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': prob[action]
            }
        }

In [2]:
import gym
from recogym import env_1_args

env_1_args['random_seed'] = 42

env_1_args['num_steps'] = 10

env = gym.make('reco-gym-v1')

env.init_gym(env_1_args)

# Instantiate instance of StaticAgent class.

num_products = 10

num_offline_users = 5
num_online_users = 5

cf_agent = StaticAgent(Configuration({
    **env_1_args,
    'num_products': num_products,
}))

# Resets random seed back to 42, or whatever we set it to in env_0_args.

env.reset_random_seed()

num_clicks, num_events = 0, 0

# ****** TRAIN OFFLINE ******

unique_offline_user_id = 0

for _ in range(num_offline_users):

    # Reset env and set done to False.
    
    unique_offline_user_id += 1
    
    env.reset(unique_offline_user_id)
    
    done = False
    observation, reward, done = None, 0, False

    while not done: # ----- LOOP

        old_observation = observation

        action, observation, reward, done, info = env.step_offline(observation, reward, done)

        cf_agent.train(old_observation, action, reward, done) # TRAIN OFFLINE

# ****** TRAIN ONLINE ******

unique_online_user_id = 0

for _ in range(num_online_users):

    # Reset env and set done to False.
    
    unique_online_user_id += 1
    
    env.reset(unique_online_user_id)
    
    observation, _, done, _ = env.step(None)
    reward = None

    while not done: # ----- LOOP

        print("\n========================== ONLINE LOOP START ===========================")

        print(f"ONLINE %%%% ratings_dict {ratings_dict}")

        print(f"ONLINE %%%% RUN act() obs sessions {observation.sessions()}")

        action = cf_agent.act(observation, reward, done) # agent creates recommendation product id

        print(f"ONLINE %%%% action {action}, obs sessions {observation.sessions()} --> RUN step()")

        observation, reward, done, info = env.step(action['a'])

        print(f"ONLINE %%%% calc obs sessions {observation.sessions()}, reward {reward}")

        # Used for calculating click through rate.

        num_clicks += 1 if reward == 1 and reward is not None else 0
        num_events += 1

        print(f"ONLINE %%%% num_clicks: {num_clicks}")
        print(f"ONLINE %%%% num_events: {num_events}")
        print(f"ONLINE %%%% CTR {num_clicks / num_events}")

        print("============================== ONLINE LOOP END ============================\n")

ctr = num_clicks / num_events

# ------------------------------------

print(f"TOTAL num_clicks: {num_clicks}")
print(f"TOTAL num_events: {num_events}")
print(f"TOTAL Click Through Rate: {ctr:.4f}")




step_offline ---- START done False
step () ---- START action_id None
step () ---- FIRST STEP => RUN generate_organic_sessions()
generate_organic_sessions () INIT - state 0
update_product_view () ---- uprob [8.01255766e-01 2.26876654e-03 1.50199390e-04 3.31549331e-02
 9.07069761e-02 1.57442822e-01 1.07446357e-04 1.05715232e-04
 4.48690411e-02 1.00000000e+00]
step_offline ---- return action None, obs sessions [{'t': 0, 'u': 1, 'z': 'pageview', 'v': 0}], done False
train () TRAIN FINISHED ratings_dict {'product': [], 'user': [], 'rating': []}
step_offline ---- START done False
step_offline ---- get obs sessions [{'t': 0, 'u': 1, 'z': 'pageview', 'v': 0}], reward None
step () ---- START action_id 3
draw_click () ---- self.config.change_omega_for_bandits or self.context_switch
draw_click () ---- cached_state_seed [ 4.03204206 -1.83490183 -4.54992974  0.84705338  1.85349613  2.40492422
 -4.8849017  -4.90114443  1.14960991  4.25361714]
draw_click () ---- ctr [0.01688128 0.0073738  0.00573575 

 2.98013854e-03 7.62179410e-01]
step () ---- state 1
step_offline ---- return action {'t': 32, 'u': 4, 'a': 2, 'ps': 0.1, 'ps-a': ()}, obs sessions [{'t': 33, 'u': 4, 'z': 'pageview', 'v': 9}], done False
train () TRAIN FINISHED ratings_dict {'product': [0, 1, 4, 4, 4, 4, 4, 6, 4, 4, 4, 6, 4, 1, 4, 4, 4, 4, 1, 6, 4, 4, 4, 5, 5, 5, 0, 5, 5, 7, 9, 5, 5, 9, 0], 'user': [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4], 'rating': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
step_offline ---- START done False
step_offline ---- get obs sessions [{'t': 33, 'u': 4, 'z': 'pageview', 'v': 9}], reward 0
step () ---- START action_id 2
draw_click () ---- self.config.change_omega_for_bandits or self.context_switch
draw_click () ---- cached_state_seed [ 1.60380771 -2.21380973 -5.78514501  0.57302925 -2.0736307   2.5431652
 -3.34610198  0.10328386 -3.27262029  2.27159189]
draw_click ()

 0.01448046 0.01065072 0.01036596 0.01400935]
draw_click () ---- ctr[recommendation] 0.01512845170044505
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
step () ---- state 1
step () ---- state 1
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 8
ONLINE %%%% CTR 0.0


ONLINE %%%% ratings_dict {'product': [0, 1, 4, 4, 4, 4, 4, 6, 4, 4, 4, 6, 4, 1, 4, 4, 4, 4, 1, 6, 4, 4, 4, 5, 5, 5, 0, 5, 5, 7, 9, 5, 5, 9, 0, 9, 9, 9, 5, 5, 7, 4, 4], 'user': [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5], 'rating': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 8 :::: get reward 0
act () 8 :::: get observation sessions []
act () 8 :::: have organic_views [ 3.  3.  0.  0.

 0.56798687 0.04462503 0.10467812 0.75311581]
update_product_view () ---- uprob [0.05990901 1.         0.00414768 0.05134101 0.15851796 0.04074918
 0.59800191 0.06458416 0.11966944 0.865816  ]
update_product_view () ---- uprob [0.07079035 0.84870008 0.0042183  0.05404035 0.17570188 0.0413687
 0.45498    0.05093112 0.14681702 1.        ]
update_product_view () ---- uprob [0.0725406  1.         0.00255056 0.03598528 0.15428901 0.03757753
 0.53811351 0.04425524 0.10273377 0.65915633]
update_product_view () ---- uprob [0.08720119 1.         0.00182928 0.03139324 0.12270669 0.03837583
 0.55302952 0.05194537 0.08468573 0.59302614]
update_product_view () ---- uprob [0.10667389 0.99784749 0.00274262 0.0477047  0.1358256  0.04819922
 0.50037273 0.05154185 0.11137395 1.        ]
update_product_view () ---- uprob [0.11058671 1.         0.00243943 0.04612838 0.11625751 0.04899306
 0.49565777 0.05084071 0.0953277  0.99465971]
update_product_view () ---- uprob [0.14151096 1.         0.00214688 0.045

 0.00562473 0.0062431  0.01417462 0.01914553]
draw_click () ---- ctr[recommendation] 0.0070343506776050545
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
step () ---- state 1
step () ---- state 1
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 115
ONLINE %%%% CTR 0.0


ONLINE %%%% ratings_dict {'product': [0, 1, 4, 4, 4, 4, 4, 6, 4, 4, 4, 6, 4, 1, 4, 4, 4, 4, 1, 6, 4, 4, 4, 5, 5, 5, 0, 5, 5, 7, 9, 5, 5, 9, 0, 9, 9, 9, 5, 5, 7, 4, 4], 'user': [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5], 'rating': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 115 :::: get reward 0
act () 115 :::: get observation sessions []
act () 115 :::: have organic_views [ 3.  

draw_click () ---- ctr [0.01345186 0.00792061 0.00742769 0.01331917 0.01514716 0.01428499
 0.00616817 0.00646324 0.01295697 0.0182546 ]
draw_click () ---- ctr[recommendation] 0.007920609912296817
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
step () ---- state 1
step () ---- state 1
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 165
ONLINE %%%% CTR 0.0


ONLINE %%%% ratings_dict {'product': [0, 1, 4, 4, 4, 4, 4, 6, 4, 4, 4, 6, 4, 1, 4, 4, 4, 4, 1, 6, 4, 4, 4, 5, 5, 5, 0, 5, 5, 7, 9, 5, 5, 9, 0, 9, 9, 9, 5, 5, 7, 4, 4], 'user': [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5], 'rating': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 165 :::: get rewar

draw_click () ---- ctr [0.01171155 0.01101994 0.00799682 0.01274192 0.01740303 0.01488119
 0.0078803  0.00605711 0.01155631 0.01676432]
draw_click () ---- ctr[recommendation] 0.011019935527258584
draw_click () ---- return click 0, recommendation 1
step () ---- reward 0 from action_id 1
step () ---- state 1
step () ---- state 1
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 3
ONLINE %%%% num_events: 222
ONLINE %%%% CTR 0.013513513513513514


ONLINE %%%% ratings_dict {'product': [0, 1, 4, 4, 4, 4, 4, 6, 4, 4, 4, 6, 4, 1, 4, 4, 4, 4, 1, 6, 4, 4, 4, 5, 5, 5, 0, 5, 5, 7, 9, 5, 5, 9, 0, 9, 9, 9, 5, 5, 7, 4, 4], 'user': [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5], 'rating': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 2

ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 3
ONLINE %%%% num_events: 270
ONLINE %%%% CTR 0.011111111111111112


ONLINE %%%% ratings_dict {'product': [0, 1, 4, 4, 4, 4, 4, 6, 4, 4, 4, 6, 4, 1, 4, 4, 4, 4, 1, 6, 4, 4, 4, 5, 5, 5, 0, 5, 5, 7, 9, 5, 5, 9, 0, 9, 9, 9, 5, 5, 7, 4, 4], 'user': [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5], 'rating': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
ONLINE %%%% RUN act() obs sessions []

-------------- ACT START --------------
act () 270 :::: get reward 0
act () 270 :::: get observation sessions []
act () 270 :::: have organic_views [ 3.  3.  0.  0. 18.  9.  3.  2.  0.  5.]
act () 270 :::: have sum(self.organic_views) 43.0
act () 270 :::: calc prob [0.06976744 0.06976744 0.         0.         0.41860465 0.20930233
 0.06976744 0.04651163 0.         0.11627907