The **interest exploration** representes a clustered bandit problem: the world consists of some very large number of documents, which cluster into topics (this is a hard clustering -- one topic per document). We further posit that users also cluster into types.

In [8]:
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt

from recsim import choice_model
from recsim.simulator import environment
from recsim.environments import interest_evolution
from recsim.environments import interest_exploration
from recsim.environments import long_term_satisfaction
from recsim.agents import random_agent

In [5]:
#############################
# Exploration Environment
#############################
env_config = {'slate_size': 2,
              'seed': 0,
              'num_candidates': 15,
              'resample_documents': True}
ie_environment = interest_exploration.create_environment(env_config)
initial_observation = ie_environment.reset()

print('User Observable Features')
print(initial_observation['user'])
print('User Response')
print(initial_observation['response'])
print('Document Observable Features')
for doc_id, doc_features in initial_observation['doc'].items():
  print('ID:', doc_id, 'features:', doc_features)


print('Document observation space')
for key, space in ie_environment.observation_space['doc'].spaces.items():
  print(key, ':', space)
print('Response observation space')
print(ie_environment.observation_space['response'])
print('User observation space')
print(ie_environment.observation_space['user'])

User Observable Features
[]
User Response
None
Document Observable Features
ID: 15 features: {'quality': array(1.22720163), 'cluster_id': 1}
ID: 16 features: {'quality': array(1.29258489), 'cluster_id': 1}
ID: 17 features: {'quality': array(1.23977078), 'cluster_id': 1}
ID: 18 features: {'quality': array(1.46045555), 'cluster_id': 1}
ID: 19 features: {'quality': array(2.10233425), 'cluster_id': 0}
ID: 20 features: {'quality': array(1.09572905), 'cluster_id': 1}
ID: 21 features: {'quality': array(2.37256963), 'cluster_id': 0}
ID: 22 features: {'quality': array(1.34928002), 'cluster_id': 1}
ID: 23 features: {'quality': array(1.00670188), 'cluster_id': 1}
ID: 24 features: {'quality': array(1.20448562), 'cluster_id': 1}
ID: 25 features: {'quality': array(2.18351159), 'cluster_id': 0}
ID: 26 features: {'quality': array(1.19411585), 'cluster_id': 1}
ID: 27 features: {'quality': array(1.03514646), 'cluster_id': 1}
ID: 28 features: {'quality': array(2.29592623), 'cluster_id': 0}
ID: 29 feature

## Observations

A RecSim observation is a dictionary with 3 keys: 
* 'user', which represent the 'User Observable Features' in the structure diagram above,
* 'doc', containing the current corpus of recommendable documents and their observable features ('Document Observable Features'),
* and 'response', indicating the user's response to the last slate of recommendations ('User Response').

In [6]:
slate = [0, 1]
for slate_doc in slate:
  print(list(initial_observation['doc'].items())[slate_doc])

observation, reward, done, _ = ie_environment.step(slate)

env_act_space = ie_environment.action_space
print('Environment action space ', env_act_space)
# Create a random agent
num_candidates = env_config['num_candidates']
slate_size = env_config['slate_size']
action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))
agent = random_agent.RandomAgent(action_space, random_seed=0)

# Stepping into environment and agent are not the same!
# This is a bandit step.
slate = agent.step(reward, observation)
print('Explor: Recommended slate ', slate)
slate = agent.step(reward, observation)
print('Explor: Recommended slate ', slate)
slate = agent.step(reward, observation)
print('Explor: Recommended slate ', slate)

('15', {'quality': array(1.22720163), 'cluster_id': 1})
('16', {'quality': array(1.29258489), 'cluster_id': 1})
Environment action space  MultiDiscrete([15 15])
Explor: Recommended slate  [1, 6]
Explor: Recommended slate  [2, 4]
Explor: Recommended slate  [4, 12]


The **evolution exploration** ...

In [11]:
#############################
# Evolution Environment
#############################
# Create a candidate_set with 5 items
# Create a simple user
slate_size = 2
user_model = interest_evolution.IEvUserModel(
    slate_size,
    choice_model_ctor=choice_model.MultinomialLogitChoiceModel,
    response_model_ctor=interest_evolution.IEvResponse)

num_candidates = 5
document_sampler = interest_evolution.IEvVideoSampler()
ievsim = environment.Environment(user_model, document_sampler,
                                    num_candidates, slate_size)

# Create agent
action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))
agent = random_agent.RandomAgent(action_space, random_seed=0)

# This agent doesn't use the previous user response
observation, documents = ievsim.reset()
slate = agent.step(1, dict(user=observation, doc=documents))
print('Evolution: Recommended slate ', slate)
slate = agent.step(1, dict(user=observation, doc=documents))
print('Evolution: Recommended slate ', slate)

Evolution: Recommended slate  [2, 0]
Evolution: Recommended slate  [0, 2]


The **Long term satisfaction** (Choc/Kale) environment.

In [15]:
env_config = {'slate_size': 3,
              'seed': 0,
              'num_candidates': 5,
              'resample_documents': True}

lts_gym_env = long_term_satisfaction.create_environment(env_config)

In [16]:
observation_0 = lts_gym_env.reset()
print('Observation 0')
print('Available documents')
doc_strings = ['doc_id ' + key + " kaleness " + str(value) for key, value
               in observation_0['doc'].items()]
print('\n'.join(doc_strings))
print('Noisy user state observation')
print(observation_0['user'])
# Agent recommends the first three documents.
recommendation_slate_0 = [0, 1, 2]
observation_1, reward, done, _ = lts_gym_env.step(recommendation_slate_0)
print('Observation 1')
print('Available documents')
doc_strings = ['doc_id ' + key + " kaleness " + str(value) for key, value
               in observation_1['doc'].items()]
print('\n'.join(doc_strings))
rsp_strings = [str(response) for response in observation_1['response']]
print('User responses to documents in the slate')
print('\n'.join(rsp_strings))
print('Noisy user state observation')
print(observation_1['user'])

Observation 0
Available documents
doc_id 5 kaleness [0.64589411]
doc_id 6 kaleness [0.43758721]
doc_id 7 kaleness [0.891773]
doc_id 8 kaleness [0.96366276]
doc_id 9 kaleness [0.38344152]
Noisy user state observation
[]
Observation 1
Available documents
doc_id 10 kaleness [0.79172504]
doc_id 11 kaleness [0.52889492]
doc_id 12 kaleness [0.56804456]
doc_id 13 kaleness [0.92559664]
doc_id 14 kaleness [0.07103606]
User responses to documents in the slate
{'click': 1, 'engagement': 6.470111094743702}
{'click': 0, 'engagement': 0.0}
{'click': 0, 'engagement': 0.0}
Noisy user state observation
[]
