st-tech · bigdataguineapig · Apr 28, 2023
diff --git a/obp/dataset/real.py b/obp/dataset/real.py
@@ -228,7 +228,7 @@ def obtain_batch_bandit_feedback(
             A dictionary containing batch logged bandit data collected by the behavior policy.
             The keys of the dictionary are as follows.
             - n_rounds: number of rounds, data size of the logged bandit data
-            - n_actions: number of actions (:math:`|\mathcal{A}|`)
+            - n_actions: number of actions (:math:`|\\mathcal{A}|`)
             - action: action variables sampled by the behavior policy
             - position: positions where actions are recommended, there are three positions in the ZOZOTOWN rec interface
             - reward: binary reward variables, click indicators
@@ -317,7 +317,7 @@ def sample_bootstrap_bandit_feedback(
             A dictionary containing logged bandit data collected by the behavior policy.
             The keys of the dictionary are as follows.
             - n_rounds: number of rounds, data size of the logged bandit data
-            - n_actions: number of actions (:math:`|\mathcal{A}|`)
+            - n_actions: number of actions (:math:`|\\mathcal{A}|`)
             - action: action variables sampled by the behavior policy
             - position: positions where actions are recommended, there are three positions in the ZOZOTOWN rec interface
             - reward: binary reward variables, click indicators