Skip to content

Commit

Permalink
Merge pull request #146 from fullflu/feature/b-ope
Browse files Browse the repository at this point in the history
[Review] Feature: Balanced-OPE estimators
  • Loading branch information
usaito committed Jan 12, 2022
2 parents 621720b + 5ef06b3 commit 76a11b7
Show file tree
Hide file tree
Showing 17 changed files with 6,059 additions and 186 deletions.
1,256 changes: 1,256 additions & 0 deletions examples/quickstart/balanced-ope-deterministic-evaluation-policy.ipynb

Large diffs are not rendered by default.

1,258 changes: 1,258 additions & 0 deletions examples/quickstart/balanced-ope-stochastic-evaluation-policy.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions obp/ope/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from obp.ope.estimators import SelfNormalizedDoublyRobust
from obp.ope.estimators import SelfNormalizedInverseProbabilityWeighting
from obp.ope.estimators import SwitchDoublyRobust
from obp.ope.estimators import BalancedInverseProbabilityWeighting
from obp.ope.estimators_continuous import (
KernelizedSelfNormalizedInverseProbabilityWeighting,
)
Expand All @@ -31,6 +32,8 @@
from obp.ope.meta_continuous import ContinuousOffPolicyEvaluation
from obp.ope.meta_slate import SlateOffPolicyEvaluation
from obp.ope.regression_model import RegressionModel
from obp.ope.classification_model import ImportanceWeightEstimator
from obp.ope.classification_model import PropensityScoreEstimator


__all__ = [
Expand All @@ -57,6 +60,9 @@
"SelfNormalizedSlateRewardInteractionIPS",
"SelfNormalizedSlateIndependentIPS",
"SelfNormalizedSlateStandardIPS",
"BalancedInverseProbabilityWeighting",
"ImportanceWeightEstimator",
"PropensityScoreEstimator",
"BaseContinuousOffPolicyEstimator",
"KernelizedInverseProbabilityWeighting",
"KernelizedSelfNormalizedInverseProbabilityWeighting",
Expand All @@ -76,6 +82,7 @@
"DoublyRobustWithShrinkage",
"SwitchDoublyRobust",
"SelfNormalizedDoublyRobust",
"BalancedInverseProbabilityWeighting",
]


Expand Down
640 changes: 640 additions & 0 deletions obp/ope/classification_model.py

Large diffs are not rendered by default.

447 changes: 382 additions & 65 deletions obp/ope/estimators.py

Large diffs are not rendered by default.

272 changes: 212 additions & 60 deletions obp/ope/estimators_tuning.py

Large diffs are not rendered by default.

193 changes: 178 additions & 15 deletions obp/ope/meta.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions obp/policy/offline_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def _create_train_data_for_opl(
context: array-like, shape (n_rounds, dim_context)
Context vectors in each round, i.e., :math:`x_t`.
action: array-like or Tensor, shape (n_rounds,)
action: array-like, shape (n_rounds,)
Continuous action values sampled by a behavior policy in each round of the logged bandit feedback, i.e., :math:`a_t`.
reward: array-like, shape (n_rounds,)
Expand Down Expand Up @@ -401,7 +401,7 @@ def fit(
context: array-like, shape (n_rounds, dim_context)
Context vectors in each round, i.e., :math:`x_t`.
action: array-like or Tensor, shape (n_rounds,)
action: array-like, shape (n_rounds,)
Continuous action values sampled by a behavior policy in each round of the logged bandit feedback, i.e., :math:`a_t`.
reward: array-like, shape (n_rounds,)
Expand Down Expand Up @@ -550,7 +550,7 @@ def _estimate_policy_value(
pscore: Tensor, shape (batch_size,)
Action choice probabilities of a behavior policy (generalized propensity scores), i.e., :math:`\\pi_b(a_t|x_t)`.
action_by_current_policy: array-like or Tensor, shape (batch_size,)
action_by_current_policy: Tensor, shape (batch_size,)
Continuous action values given by the current policy.
Returns
Expand Down Expand Up @@ -863,7 +863,7 @@ def _create_train_data_for_q_func_estimation(
context: array-like, shape (n_rounds, dim_context)
Context vectors in each round, i.e., :math:`x_t`.
action: array-like or Tensor, shape (n_rounds,)
action: array-like, shape (n_rounds,)
Continuous action values sampled by a behavior policy in each round of the logged bandit feedback, i.e., :math:`a_t`.
reward: array-like, shape (n_rounds,)
Expand Down Expand Up @@ -933,7 +933,7 @@ def fit(
context: array-like, shape (n_rounds, dim_context)
Context vectors in each round, i.e., :math:`x_t`.
action: array-like or Tensor, shape (n_rounds,)
action: array-like, shape (n_rounds,)
Continuous action values sampled by a behavior policy in each round of the logged bandit feedback, i.e., :math:`a_t`.
reward: array-like, shape (n_rounds,)
Expand Down
11 changes: 11 additions & 0 deletions obp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def check_ope_inputs(
reward: Optional[np.ndarray] = None,
pscore: Optional[np.ndarray] = None,
estimated_rewards_by_reg_model: Optional[np.ndarray] = None,
estimated_importance_weights: Optional[np.ndarray] = None,
) -> Optional[ValueError]:
"""Check inputs for ope.
Expand All @@ -329,6 +330,8 @@ def check_ope_inputs(
estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list), default=None
Expected rewards given context, action, and position estimated by regression model, i.e., :math:`\\hat{q}(x_t,a_t)`.
estimated_importance_weights: array-like, shape (n_rounds,), default=None
Importance weights estimated via supervised classification, i.e., :math:`\\hat{w}(x_t, a_t)`.
"""
# action_dist
check_array(array=action_dist, name="action_dist", expected_dim=3)
Expand Down Expand Up @@ -360,6 +363,14 @@ def check_ope_inputs(
"Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False"
)

if estimated_importance_weights is not None:
if not (action.shape[0] == estimated_importance_weights.shape[0]):
raise ValueError(
"Expected `action.shape[0] == estimated_importance_weights.shape[0]`, but found it False"
)
if np.any(estimated_importance_weights < 0):
raise ValueError("estimated_importance_weights must be non-negative")

# action, reward
if action is not None or reward is not None:
check_array(array=action, name="action", expected_dim=1)
Expand Down
Loading

0 comments on commit 76a11b7

Please sign in to comment.