In [1]:
from copy import deepcopy
import recogym
from recogym import Configuration

import numpy as np
from numpy.random import choice
from recogym.agents import Agent

import pandas as pd
from surprise import Dataset
from surprise import Reader

ratings_dict = {

    "product": [],
    "user": [],
    "rating": [],
}

class SingleActionAgent(Agent):

    def __init__(self, config):
        
        super(SingleActionAgent, self).__init__(config) # Set number of products as an attribute of the Agent.

        self.organic_views = np.zeros(self.config.num_products) # Track number of times each item viewed in Organic session.

        self.act_counter = 0
        self.train_counter = 0

    def train(self, observation, action, reward, done):

        # Train method learns from a tuple of data. This method can be called for offline or online learning
        # Adding organic session to organic view counts.

        if observation:

            for session in observation.sessions(): # -- LOOP

                print(f"train () :::: session {session}")
                
                ratings_dict['product'].append(session['v']) # viewed product
                ratings_dict['user'].append(session['u'])
                ratings_dict['rating'].append(1) # scale 1 - organic view, 0 - no view 

                print("\n-------------- TRAIN START --------------")
                
                print(f"train () {self.train_counter} :::: reward {reward}")

                self.organic_views[session['v']] += 1

                print(f"train () {self.train_counter} :::: self.organic_views {self.organic_views}")

                self.train_counter +=1

                print("-------------- TRAIN END --------------\n")

        print(f"train () TRAIN FINISHED ratings_dict {ratings_dict}")

    def act(self, observation, reward, done):

        #-------------------------------------------------------------------
        # INSTEAD OF TRAIN THE AGENT - USE COLLABORATIVE FILTERING FRAMEWORK
        #-------------------------------------------------------------------

        print("\n-------------- ACT START --------------")

        # An act method takes in an observation, which could either be `None` or an Organic_Session
        # and returns a integer between 0 and num_products indicating which product the agent recommends.

        print(f"act () {self.act_counter} :::: get reward {reward}")
        print(f"act () {self.act_counter} :::: get observation sessions {observation.sessions()}")
        print(f"act () {self.act_counter} :::: have organic_views {self.organic_views}")
        print(f"act () {self.act_counter} :::: have sum(self.organic_views) {sum(self.organic_views)}")

        prob = self.organic_views / sum(self.organic_views)

        print(f"act () {self.act_counter} :::: calc prob {prob}")

        print(f"act () {self.act_counter} :::: have num_products {num_products}")

        #--------------------------------------------------
 
        # action = choice(self.config.num_products, p = prob) # Choosing action RANDOMLY in proportion with number of views.

        action = 1

        # TODO: create action based on CF Surprise lib recommendation:
        # userId, Propensity Scores => MOST RELEVANT Product Id
        
        #--------------------------------------------------

        print(f"act () {self.act_counter} :::: return action {action}")
        print(f"act () {self.act_counter} :::: return prob[action] {prob[action]}")

        self.act_counter += 1

        print("-------------- ACT END --------------\n")

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': prob[action]
            }
        }

In [2]:
import gym
from recogym import env_1_args

env_1_args['random_seed'] = 42

env_1_args['num_steps'] = 10

env = gym.make('reco-gym-v1')

env.init_gym(env_1_args)

# Instantiate instance of SingleActionAgent class.

num_products = 10

num_offline_users = 5
num_online_users = 5

cf_agent = SingleActionAgent(Configuration({
    **env_1_args,
    'num_products': num_products,
}))

# Resets random seed back to 42, or whatever we set it to in env_0_args.

env.reset_random_seed()

num_clicks, num_events = 0, 0

# ****** TRAIN OFFLINE ******

unique_offline_user_id = 0

for _ in range(num_offline_users):

    # Reset env and set done to False.
    
    unique_offline_user_id += 1
    
    env.reset(unique_offline_user_id)
    
    done = False
    observation, reward, done = None, 0, False

    while not done: # ----- LOOP

        old_observation = observation

        action, observation, reward, done, info = env.step_offline(observation, reward, done)

        cf_agent.train(old_observation, action, reward, done) # TRAIN OFFLINE

# ****** TRAIN ONLINE ******

unique_online_user_id = 0

for _ in range(num_online_users):

    # Reset env and set done to False.
    
    unique_online_user_id += 1
    
    env.reset(unique_online_user_id)
    
    observation, _, done, _ = env.step(None)
    reward = None

    while not done: # ----- LOOP

        print("\n========================== ONLINE LOOP START ===========================")

        print(f"ONLINE %%%% ratings_dict {ratings_dict}")

        print(f"ONLINE %%%% RUN act() obs sessions {observation.sessions()}")

        action = cf_agent.act(observation, reward, done) # agent creates recommendation product id

        print(f"ONLINE %%%% action {action}, obs sessions {observation.sessions()} --> RUN step()")

        observation, reward, done, info = env.step(action['a'])

        print(f"ONLINE %%%% calc obs sessions {observation.sessions()}, reward {reward}")

        # Used for calculating click through rate.

        num_clicks += 1 if reward == 1 and reward is not None else 0
        num_events += 1

        print(f"ONLINE %%%% num_clicks: {num_clicks}")
        print(f"ONLINE %%%% num_events: {num_events}")
        print(f"ONLINE %%%% CTR {num_clicks / num_events}")

        print("============================== ONLINE LOOP END ============================\n")

ctr = num_clicks / num_events

# ------------------------------------

print(f"TOTAL num_clicks: {num_clicks}")
print(f"TOTAL num_events: {num_events}")
print(f"TOTAL Click Through Rate: {ctr:.4f}")




train () TRAIN FINISHED ratings_dict {'product': [], 'user': [], 'rating': []}
train () :::: session {'t': 0, 'u': 1, 'z': 'pageview', 'v': 85}

-------------- TRAIN START --------------
train () 0 :::: reward 0


IndexError: index 85 is out of bounds for axis 0 with size 10