In [1]:
from recogym import Configuration

In [2]:
import gym
from recogym import env_1_args
from recogym.agents.static_agent import SingleActionAgent

env_1_args['random_seed'] = 42

env_1_args['num_steps'] = 10

env = gym.make('reco-gym-v1')

env.init_gym(env_1_args)

# Instantiate instance of CFAgent class.

num_products = 10

num_offline_users = 5
num_online_users = 5

static_agent = SingleActionAgent(Configuration({
    **env_1_args,
    'num_products': num_products,
}))

# Resets random seed back to 42, or whatever we set it to in env_0_args.

env.reset_random_seed()

num_clicks, num_events = 0, 0

# ****** TRAIN OFFLINE ******

unique_offline_user_id = 0

for _ in range(num_offline_users):

    # Reset env and set done to False.

    unique_offline_user_id += 1

    env.reset()
    done = False
    observation, reward, done = None, 0, False

    while not done: # ----- LOOP

        old_observation = observation

        action, observation, reward, done, info = env.step_offline(observation, reward, done)

        static_agent.train(old_observation, action, reward, done) # TRAIN OFFLINE

# ****** TRAIN ONLINE ******

unique_online_user_id = 0

for _ in range(num_online_users):

    # Reset env and set done to False.

    unique_online_user_id += 1

    env.reset(unique_online_user_id)

    observation, _, done, _ = env.step(None)
    reward = None

    while not done: # ----- LOOP
        
        print("\n========================== ONLINE LOOP START ===========================")

        print(f"ONLINE %%%% RUN act() obs sessions {observation.sessions()}")

        action = static_agent.act(observation, reward, done) # create recommendation product id

        print(f"ONLINE %%%% action {action}, obs sessions {observation.sessions()} --> RUN step()")

        observation, reward, done, info = env.step(action['a'])

        print(f"ONLINE %%%% calc obs sessions {observation.sessions()}, reward {reward}")

        # Used for calculating click through rate.

        num_clicks += 1 if reward == 1 and reward is not None else 0
        num_events += 1
        
        print(f"ONLINE %%%% num_clicks: {num_clicks}")
        print(f"ONLINE %%%% num_events: {num_events}")
        print(f"ONLINE %%%% CTR {num_clicks / num_events}")
        
        print("============================== ONLINE LOOP END ============================\n")

ctr = num_clicks / num_events

# ------------------------------------

print(f"TOTAL num_clicks: {num_clicks}")
print(f"TOTAL num_events: {num_events}")
print(f"TOTAL Click Through Rate: {ctr:.4f}")




ONLINE %%%% RUN act() obs sessions [{'t': 0, 'u': 1, 'z': 'pageview', 'v': 1}]
ONLINE %%%% action {'t': 1, 'u': 1, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [{'t': 0, 'u': 1, 'z': 'pageview', 'v': 1}] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 1
ONLINE %%%% CTR 0.0


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 2, 'u': 1, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 2
ONLINE %%%% CTR 0.0


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 3, 'u': 1, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 0
ONLINE %%%% num_events: 3
ONLINE %%%% CTR 0.0


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 4, 'u': 1, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE

ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 6
ONLINE %%%% num_events: 252
ONLINE %%%% CTR 0.023809523809523808


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 38, 'u': 3, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 6
ONLINE %%%% num_events: 253
ONLINE %%%% CTR 0.023715415019762844


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 39, 'u': 3, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 6
ONLINE %%%% num_events: 254
ONLINE %%%% CTR 0.023622047244094488


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 40, 'u': 3, 'a': 9, 'ps': 0.11627906976744186}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 6
ONLINE %%%% num_events: 255
ONLINE %%%% CTR 0.023529411764705882


ONLINE %%%% RUN act() obs sessio

ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 10
ONLINE %%%% num_events: 481
ONLINE %%%% CTR 0.02079002079002079


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 241, 'u': 4, 'a': 5, 'ps': 0.20930232558139536}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 10
ONLINE %%%% num_events: 482
ONLINE %%%% CTR 0.02074688796680498


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 242, 'u': 4, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 10
ONLINE %%%% num_events: 483
ONLINE %%%% CTR 0.020703933747412008


ONLINE %%%% RUN act() obs sessions []
ONLINE %%%% action {'t': 243, 'u': 4, 'a': 4, 'ps': 0.4186046511627907}, obs sessions [] --> RUN step()
ONLINE %%%% calc obs sessions [], reward 0
ONLINE %%%% num_clicks: 10
ONLINE %%%% num_events: 484
ONLINE %%%% CTR 0.02066115702479339


ONLINE %%%% RUN act() obs se