In [5]:
from typing import List, Optional
import numpy as np
from scipy.special import expit


class RankingSimulator:
    def __init__(
        self,
        num_actions: int,
        feature_dim: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        sparsity_factor: float = -2,
        position_bias_factor: float = 0.1,
        seed: int = 0,
    ):
        self.rng = np.random.default_rng(seed)

        if weights is None:
            if feature_dim is None:
                raise ValueError(
                    "Either feature_dim or weights must be provided for initialization"
                )
            weights = self.rng.normal(size=feature_dim)
        if feature_dim is None:
            feature_dim = len(weights)

        self.num_actions = num_actions
        self.weights = weights
        self.feature_dim = feature_dim
        self.sparsity_factor = sparsity_factor
        self.position_bias = np.exp(-position_bias_factor * np.arange(num_actions))

    def sample(self, num_impressions: int):
        # get action features and reward probs
        item_features = self.rng.normal(
            size=(num_impressions, self.num_actions, self.feature_dim)
        )
        logits = np.dot(item_features, self.weights) + self.sparsity_factor
        base_reward_probs = expit(logits)
        position_biased_probs = base_reward_probs * self.position_bias

        # get first observed reward
        rewards = (
            self.rng.random((num_impressions, self.num_actions)) < position_biased_probs
        )
        rewards = (rewards & (np.cumsum(rewards, axis=1) == 1)).astype(int)

        # Assign equal propensity scores
        propensity_scores = np.full((num_impressions, self.num_actions), 1 / self.num_actions)

        return [
            {
                "impression_id": i,
                "item_features": item_features[i],
                "num_actions": self.num_actions,
                "rewards": rewards[i],
                "base_reward_probs": base_reward_probs[i],
                "position_biased_probs": position_biased_probs[i],
                "propensity_scores": propensity_scores[i],
            }
            for i in range(num_impressions)
        ]


simulator = RankingSimulator(num_actions=3, feature_dim=4, sparsity_factor=-2, seed=0)
simulated_data = simulator.sample(num_impressions=10000)

In [6]:
simulated_data[7]

{'impression_id': 7,
 'item_features': array([[-0.75460579,  1.68910745, -0.28738771,  1.57440828],
        [-0.43278585, -0.73548329,  0.24978537,  1.03145308],
        [ 0.16100958, -0.58552882, -1.34121971, -1.40152021]]),
 'num_actions': 3,
 'rewards': array([0, 0, 1]),
 'base_reward_probs': array([0.08811187, 0.15589613, 0.05174125]),
 'position_biased_probs': array([0.08811187, 0.14106065, 0.04236215]),
 'propensity_scores': array([0.33333333, 0.33333333, 0.33333333])}