- `action`, is a number between `0` and `num_products - 1` that references the index of the product recommended.
- `observation` will either be `None` or a session of Organic data, showing the index of products the user views.
- `reward` is 0 if the user does not click on the recommended product and 1 if they do. Notice that when a user clicks on a product (Wherever the reward is 1), they start a new Organic session.
- `done` is a True/False flag indicating if the episode (aka user's timeline) is over.
- `info` currently not used, so it is always an empty dictionary.

Also, notice that the first `action` is `None`.  In our implementation, the agent observes Organic behaviour before
recommending anything.

The agent records merely how many times a user sees each product organically,
then when required to make a recommendation, the agent chooses a product randomly in proportion with
a number of times the user has viewed it.

In [5]:
import gym, recogym

# env_0_args is a dictionary of default parameters (i.e. number of products)
from recogym import env_1_args, Configuration

# You can overwrite environment arguments here:
env_1_args['random_seed'] = 42

# Initialize the gym for the first time by calling .make() and .init_gym()
env = gym.make('reco-gym-v1')
env.init_gym(env_1_args)

import numpy as np
from numpy.random import choice
from recogym.agents import Agent

# Define an Agent class.
class PopularityAgent(Agent):

    def __init__(self, config):
        
        super(PopularityAgent, self).__init__(config) # Set number of products as an attribute of the Agent.

        self.organic_views = np.zeros(self.config.num_products) # Track number of times each item viewed in Organic session.

    def train(self, observation, action, reward, done):

        # Train method learns from a tuple of data. This method can be called for offline or online learning
        # Adding organic session to organic view counts.

        if observation:
            for session in observation.sessions():
                self.organic_views[session['v']] += 1

    def act(self, observation, reward, done):

        # Act method returns an action - based on current observation and past history
        # Choosing action randomly in proportion with number of views.

        prob = self.organic_views / sum(self.organic_views)
        action = choice(self.config.num_products, p = prob)

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': prob[action]
            }
        }

In [7]:
# Instantiate instance of PopularityAgent class.
num_products = 10
agent = PopularityAgent(Configuration({
    **env_1_args,
    'num_products': num_products,
}))

# Resets random seed back to 42, or whatever we set it to in env_0_args.
env.reset_random_seed()

# Train on 1000 users offline.
num_offline_users = 1000

for _ in range(num_offline_users):

    # Reset env and set done to False.
    env.reset()
    done = False

    observation, reward, done = None, 0, False
    while not done:
        old_observation = observation
        action, observation, reward, done, info = env.step_offline(observation, reward, done)
        agent.train(old_observation, action, reward, done)

# Train on 100 users online and track click through rate.
num_online_users = 100
num_clicks, num_events = 0, 0

for _ in range(num_online_users):

    # Reset env and set done to False.
    env.reset()
    observation, _, done, _ = env.step(None)
    reward = None
    done = None
    while not done:
        action = agent.act(observation, reward, done)
        observation, reward, done, info = env.step(action['a'])

        # Used for calculating click through rate.
        num_clicks += 1 if reward == 1 and reward is not None else 0
        num_events += 1

ctr = num_clicks / num_events

print(f"Click Through Rate: {ctr:.4f}")

Click Through Rate: 0.0126


## Testing our Popularity Agent

In [9]:
import gym, recogym
from recogym import env_1_args

from copy import deepcopy

env_1_args['random_seed'] = 42

env = gym.make('reco-gym-v1')
env.init_gym(env_1_args)

num_products = env_1_args['num_products']

popularity_agent = PopularityAgent(Configuration(env_1_args))

In [11]:
# Credible interval of the CTR median and 0.025 0.975 quantile.

recogym.test_agent(deepcopy(env), deepcopy(popularity_agent), 1000, 1000)


Organic Users: 0it [00:00, ?it/s]
Users:   0%|          | 5/1000 [00:00<00:20, 47.43it/s]

START: Agent Training #0
START: Agent Training @ Epoch #0


Users: 100%|██████████| 1000/1000 [00:18<00:00, 53.83it/s]
Organic Users: 0it [00:00, ?it/s]
Users:   0%|          | 2/1000 [00:00<00:52, 18.92it/s]

END: Agent Training @ Epoch #0 (18.588746070861816s)
START: Agent Evaluating @ Epoch #0


Users: 100%|██████████| 1000/1000 [00:19<00:00, 52.37it/s]


END: Agent Evaluating @ Epoch #0 (19.2580349445343s)


(0.014049314459026383, 0.01324351763294106, 0.014886678833290312)