In [1]:
import gym, recogym

# env_0_args is a dictionary of default parameters (i.e. number of products)
from recogym import env_1_args, Configuration

# You can overwrite environment arguments here:
env_1_args['random_seed'] = 42

# Initialize the gym for the first time by calling .make() and .init_gym()
env = gym.make('reco-gym-v1')
env.init_gym(env_1_args)

# .reset() env before each episode (one episode per user).
env.reset()
done = False

# Counting how many steps.
i = 0

observation, reward, done = None, 0, False
while not done:
    action, observation, reward, done, info = env.step_offline(observation, reward, done)
    print(f"STEP: {i} - ACTION: {action} - OBSERVATION: {observation.sessions()} - REWARD: {reward}")
    i += 1

STEP: 0 - ACTION: None - OBSERVATION: [{'t': 0, 'u': 0, 'z': 'pageview', 'v': 85}] - REWARD: None
STEP: 1 - ACTION: {'t': 1, 'u': 0, 'a': 99, 'ps': 0.001, 'ps-a': ()} - OBSERVATION: [] - REWARD: 0
STEP: 2 - ACTION: {'t': 2, 'u': 0, 'a': 308, 'ps': 0.001, 'ps-a': ()} - OBSERVATION: [] - REWARD: 0
STEP: 3 - ACTION: {'t': 3, 'u': 0, 'a': 805, 'ps': 0.001, 'ps-a': ()} - OBSERVATION: [] - REWARD: 0


In [2]:
# Create list of hard coded actions.
actions = [None] + [1, 2, 3, 4, 5, 6, 7, 8]

# Reset env and set done to False.
env.reset()
done = False

# Counting how many steps.
i = 0

while not done and i < len(actions):
    action = actions[i]
    observation, reward, done, info = env.step(action)
    print(f"Step: {i} - Action: {action} - Observation: {observation.sessions()} - Reward: {reward}")
    i += 1

Step: 0 - Action: None - Observation: [{'t': 0, 'u': 0, 'z': 'pageview', 'v': 796}] - Reward: None
Step: 1 - Action: 1 - Observation: [] - Reward: 0
Step: 2 - Action: 2 - Observation: [] - Reward: 0
Step: 3 - Action: 3 - Observation: [] - Reward: 0
Step: 4 - Action: 4 - Observation: [] - Reward: 0
Step: 5 - Action: 5 - Observation: [{'t': 6, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 7, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 8, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 9, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 10, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 11, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 12, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 13, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 14, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 15, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 16, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 17, 'u': 0, 'z': 'pageview', 'v': 24}, {'t': 18, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 19, 'u': 0, 'z': 'pageview', 'v': 796}, {'t': 20, 'u': 0, 'z': 'pa

In [3]:
from recogym.agents import NnIpsAgent, nn_ips_args

In [4]:
# Instantiate instance of HelloWorldAgent class.
num_products = 10

agent = NnIpsAgent(Configuration({
    **nn_ips_args,
    **env_1_args,
}))

# Resets random seed back to 42, or whatever we set it to in env_0_args.
env.reset_random_seed()

# Train on 1000 users offline.
num_offline_users = 1000

for _ in range(num_offline_users):

    # Reset env and set done to False.
    env.reset()
    done = False

    observation, reward, done = None, 0, False
    while not done:
        old_observation = observation
        action, observation, reward, done, info = env.step_offline(observation, reward, done)
        agent.train(old_observation, action, reward, done) # TRAIN OFFLINE

# Train on 100 users online and track click through rate.
num_online_users = 100
num_clicks, num_events = 0, 0

for _ in range(num_online_users):

    # Reset env and set done to False.
    env.reset()
    observation, _, done, _ = env.step(None)
    reward = None
    done = None

    while not done: # ----- LOOP

        action = agent.act(observation, reward, done) # create recommendation

        observation, reward, done, info = env.step(action['a'])

        # Used for calculating click through rate.
        num_clicks += 1 if reward == 1 and reward is not None else 0
        num_events += 1

ctr = num_clicks / num_events

print(f"Click Through Rate: {ctr:.4f}")

AssertionError: 