In [1]:
#@title Generic imports
import numpy as np
from gym import spaces
import matplotlib.pyplot as plt
from scipy import stats

KeyboardInterrupt: 

In [None]:
#@title RecSim imports
from recsim import document
from recsim import user
from recsim.choice_model import MultinomialLogitChoiceModel
from recsim.simulator import environment
from recsim.simulator import recsim_gym

In [None]:
DOC_NUM = 5

In [None]:
np.random.seed(1)

In [None]:
W = np.random.uniform(0, 1, size=(DOC_NUM, DOC_NUM))

In [None]:
def generate_W():
    W = np.random.uniform(0, 1, size=(DOC_NUM, DOC_NUM))
    W = W * (np.ones((DOC_NUM, DOC_NUM)) - np.eye(DOC_NUM, DOC_NUM))
    W = W / W.sum(axis=1).reshape(-1, 1)
    return W

In [None]:
W = generate_W()

In [None]:
W = np.array([[0., .8, .01, .09, .1],
              [.1, .6, 0., 0., .3],
              [0., .7, 0.1, 0.1, 0.1],
              [0., .5, 4., 0., .1],
              [0., 1., 0., 0., 0.]])

In [None]:
W

### Модель документа

Модель состоит только из номера

In [None]:
class LTSDocument(document.AbstractDocument):

#    doc_num = DOC_NUM
    def __init__(self, doc_id):
        # doc_id is an integer representing the unique ID of this document
        super(LTSDocument, self).__init__(doc_id)
        
    def create_observation(self):
        return np.array([self._doc_id])

    @staticmethod
    def observation_space():
        return spaces.Discrete(LTSDocument.doc_num)
  
    def __str__(self):
        return "Document #{}".format(self._doc_id)

In [None]:
class LTSDocumentSampler(document.AbstractDocumentSampler):
    def __init__(self, doc_num=10, doc_ctor=LTSDocument, **kwargs):
        doc_ctor.doc_num = doc_num
        super(LTSDocumentSampler, self).__init__(doc_ctor, **kwargs)
        self._doc_count = 0
        self.doc_num = doc_num
        
    def sample_document(self):
        doc_features = {}
        doc_features['doc_id'] = self._doc_count
        
        if self._doc_count < self.doc_num:
            self._doc_count = (self._doc_count + 1) % self.doc_num
        else:
            self._doc_count = 0
        return self._doc_ctor(**doc_features)

With this we can now simulate documents.

In [None]:
sampler = LTSDocumentSampler(DOC_NUM)
for i in range(DOC_NUM * 2): 
    d = sampler.sample_document()
    print("Documents have observation space:", d.observation_space(), "\n"
          "An example realization is: ", d.create_observation())

## User state and user sampler 
Similarly to documents, we begin by implementing a user state class, i.e. a class that acts as a container for all these parameters. Similarly to AbstractDocument, AbstractUserState requires us to implement an observation_space() and create_observations(). These are used to feed partial (or complete) information about the user's state to the agent at every iteration. 

We also maintain a time budget, which will cap the session length. In this scenario, the session length will be fixed to some constant, so it's not worth being explicit with our time budget modeling, but one can definitely consider this as part of the state and do more interesting thigs with it. 

Finally we will implement a score_document method, that maps a document to a non-negative real number. This significance of this will become clear shortly. 

In [None]:
class LTSUserState(user.AbstractUserState):
    
    def __init__(self, current, time_budget=1):

        ## State vavriables
        ##############################
        self.current = current
        self.time_budget = time_budget

    
    def create_observation(self):
        return np.array([self.current,])

    @staticmethod
    def observation_space():
        return spaces.Discrete(LTSUserState.doc_num)

    
    # scoring function for use in the choice model -- the user is more likely to
    # click on more chocolatey content.
    def score_document(self, doc_obs):
#         print(self.current, doc_obs[0], LTSUserState.doc_num)
        return W[self.current, doc_obs[0]]

Also similarly to our document model, we have need a starting state sampler, that sets the starting user state for every session. For this tutoral, we will just sample the starting $\text{nke}_0$ and keep all the static parameters the same, meaning that we essentially deal with the same user at different levels of satisfaction. One can, of course, extend this easily to generate also users with different parameters by randomizing the values. 

Observe that if $\eta = 0$, $\text{nke}$ would be bounded in the interval $\left[-\frac{1}{1-\beta}, \ldots, \frac{1}{1-\beta} \right]$ at all times, so as starting distribution we just sample uniformly from that range. Sampling code has to be implemented in sample_user(), as required by the base class.

In [None]:
class LTSStaticUserSampler(user.AbstractUserSampler):
    _state_parameters = None

    def __init__(self, user_ctor=LTSUserState, doc_num=-10, current=0, **kwargs):
        user_ctor.doc_num = doc_num
        self.doc_num = doc_num
        self._state_parameters = {'current': current}
        super(LTSStaticUserSampler, self).__init__(user_ctor, **kwargs)

    def sample_user(self):
        current = np.random.randint(self.doc_num)
        self._state_parameters['current'] = current
        return self._user_ctor(**self._state_parameters)


Let's try this out!

In [None]:
sampler = LTSStaticUserSampler(doc_num=10)
starting_nke = []
for i in range(1000):
    sampled_user = sampler.sample_user()

## Response model

The next thing we want to check off our list is the user response class. RecSim will generate one response for every recommended item in the slate. The contents of the response are what the agent will see as document-specific feedback from the recommendation (the non-document specific feedback being generated in LTSUserState.create_observation).  

In [None]:
class LTSResponse(user.AbstractResponse):

    def __init__(self, clicked=False):
        self.clicked = clicked

    def create_observation(self):
#        print(self.clicked)
        return {'click': int(self.clicked)}

    @classmethod
    def response_space(cls):
        return spaces.Dict({
            'click':
                spaces.Discrete(2),
        })

##User model

Now that we have a way to generate users for our sessions, need to specify the actual user behavior. A RecSim user model (deriving from recsim.user.AbstractUserModel) is responsible for 
* maintaining user state, 
* evolving user state as a result of recommendations,
* generating a response to a slate of recommendations.

To this end, our user model is required by the base class to implement update_state() and simulate_response(), as well as is_terminal, which indicates when the end of the session occurs. This is facilitated by decreasing *self.time_budget* on every step.To make presentation clearer, we will define each function separately, and then assemble them into a class.



Our init is simple---we just pass the response_model constructor, user sampler and slate size down to the AbstractUserModel base class. Exploring other environments, the reader might notice that user model __init__ functions do offer a lot of flexibility for configuring the simulation. For now, however, we stick to the basics and hardcode things.

In [None]:
def user_init(self,
              slate_size,
              doc_num, choice_features,
              seed=0):

    LTSUserStateClass = LTSUserState
    LTSUserStateClass.doc_num = doc_num
    
    super(LTSUserModel,
        self).__init__(LTSResponse,
                       LTSStaticUserSampler(LTSUserState,
                                            seed=seed, doc_num=DOC_NUM), slate_size)
    self.choice_model = UserChoiceModel(choice_features)

The simulate_response() method takes in a slate (list) of recommended (i.e., produced by the agent) LTSDocuments and must output a slate of user responses. The *k*-th response in the slate of responses corresponds to the *k*-th document in the recommendation slate. In this case, we pick one document to click on based on our choice model, and produce an engagement value. We will let the responses to the unclicked documents be vacuous, however, one might use them in more subtle ways (e.g., recording whether the user inspected that document, etc.).  

In [None]:
from recsim.choice_model import NormalizableChoiceModel, softmax

In [None]:
class UserChoiceModel(NormalizableChoiceModel):  # pytype: disable=ignored-metaclass
    """Abstract class to represent the user choice model.
    Each user has a choice model.
    """
    def __init__(self, choice_features):
        super(UserChoiceModel, self).__init__()
        self._no_click_mass = choice_features.get('no_click_mass', -float('Inf'))

    def score_documents(self, user_state, doc_obs):
        logits = self._score_documents_helper(user_state, doc_obs)
        logits = np.append(logits, self._no_click_mass)

        all_scores = softmax(logits)
        self._scores = all_scores[:-1]

        self._score_no_click = all_scores[-1]

In [None]:
def simulate_response(self, slate_documents):
    # List of empty responses
    responses = [self._response_model_ctor() for _ in slate_documents]
    # Get click from of choice model.
    
    self.choice_model.score_documents(self._user_state, [doc.create_observation() for doc in slate_documents])
    scores = self.choice_model.scores
    selected_index = self.choice_model.choose_item()
    # Populate clicked item.
#    selected_index.remove(None)
    if not selected_index:
        return responses
    self._generate_response(slate_documents[selected_index],
                          responses[selected_index])
    return responses

def generate_response(self, doc, response):
    '''doc'''
    response.clicked = True


In [None]:
def update_state(self, slate_documents, responses):
    for doc, response in zip(slate_documents, responses):
        if response.clicked:
            self._user_state.time_budget -= np.random.binomial(1, 0.1)
            return
        else:
            self._user_state.time_budget -= np.random.binomial(1, 0.2)

In [None]:
def is_terminal(self):
  """Returns a boolean indicating if the session is over."""
  return self._user_state.time_budget <= 0


In [None]:
LTSUserModel = type("LTSUserModel", (user.AbstractUserModel,),
                    {"__init__": user_init,
                     "is_terminal": is_terminal,
                     "update_state": update_state,
                     "simulate_response": simulate_response,
                     "_generate_response": generate_response})

Finally, we assemble all components into an Environment.

In [None]:
 slate_size = 3
 num_candidates = 4
 ltsenv = environment.Environment(
            LTSUserModel(slate_size, choice_features={'no_click_mass': -1}, doc_num=DOC_NUM),
            LTSDocumentSampler(doc_num=DOC_NUM),
            num_candidates,
            slate_size,
            resample_documents=True)


In [None]:
def clicked_engagement_reward(responses):
  reward = 0.0
  for response in responses:
    if response.clicked:
      reward += 1
#  print(responses)
  return reward

Now, we simply use the OpenAI gym wrapper, which essentially provides a familiar step-based API.

In [None]:
lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)

In [None]:
np.random.seed(100004)

In [None]:
observation_0 = lts_gym_env.reset()
print('Observation 0')
print('Available documents')
doc_strings = ['doc_id ' + key + " kaleness " + str(value) for key, value
               in observation_0['doc'].items()]
print('\n'.join(doc_strings))
print('Noisy user state observation')
print(observation_0['user'])

for i in range(10):
    # Agent recommends the first three documents.
    recommendation_slate_0 = [0, 1, 2]
    observation_1, reward, done, _ = lts_gym_env.step(recommendation_slate_0)
    print('Observation 1')
    print('Available documents')
    doc_strings = ['doc_id ' + key + " kaleness " + str(value) for key, value
                   in observation_1['doc'].items()]
    print('\n'.join(doc_strings))
    rsp_strings = [str(response) for response in observation_1['response']]
    print('User responses to documents in the slate')
    print('\n'.join(rsp_strings))
    print('Noisy user state observation')
    print(observation_1['user'])

In [None]:
observation_0['doc'].items()

In [None]:
from recsim.agents.full_slate_q_agent import FullSlateQAgent
from recsim.agents.random_agent import RandomAgent


from recsim.simulator import runner_lib

def create_agent(sess, environment, eval_mode, summary_writer=None):
    return FullSlateQAgent(sess, environment.observation_space, environment.action_space,) 
#    return RandomAgent(environment.action_space,)

In [None]:
slate_size = 2
num_candidates = 5
ltsenv = environment.Environment(
        LTSUserModel(slate_size, choice_features={'no_click_mass': -1}, doc_num=DOC_NUM),
        LTSDocumentSampler(doc_num=DOC_NUM),
        num_candidates,
        slate_size,
        resample_documents=False)

env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)

tmp_base_dir = 'tmp'
episode_log_file_train = 'episodes_train'

In [None]:
import numpy as np
from recsim.agents import full_slate_q_agent
from recsim.simulator import runner_lib

seed = 0
slate_size = 1
np.random.seed(seed)
env_config = {
  'num_candidates': 5,
  'slate_size': slate_size,
  'resample_documents': True,
  'seed': seed,
}

In [None]:
! pwd

In [None]:
! rm -rf tmp

runner = runner_lib.TrainRunner(
  base_dir=tmp_base_dir,
  create_agent_fn=create_agent,
  env=env,
  episode_log_file=episode_log_file_train,
  max_training_steps=100,
  num_iterations=1000)
runner.run_experiment()

runner = runner_lib.EvalRunner(
  base_dir=tmp_base_dir,
  create_agent_fn=create_agent,
  env=env,
  max_eval_episodes=5,
  test_mode=True)

runner.run_experiment()

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir tmp

In [2]:
0 is None

False