In [8]:
!pip install -q tf-agents==0.19.0


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [19]:
import functools
import os

import tensorflow as tf
from tf_agents.bandits.agents import dropout_thompson_sampling_agent as dropout_ts_agent
from tf_agents.bandits.agents import lin_ucb_agent
from tf_agents.bandits.agents import linear_thompson_sampling_agent as lin_ts_agent
from tf_agents.bandits.agents import neural_epsilon_greedy_agent as eps_greedy_agent
from tf_agents.bandits.agents.examples.v2 import trainer
from tf_agents.bandits.environments import environment_utilities
from tf_agents.bandits.environments import movielens_per_arm_py_environment
from tf_agents.bandits.environments import movielens_py_environment
from tf_agents.bandits.metrics import tf_metrics as tf_bandit_metrics
from tf_agents.bandits.networks import global_and_arm_feature_network
from tf_agents.environments import tf_py_environment
from tf_agents.networks import q_network

In [20]:
# Constants
BATCH_SIZE = 8
TRAINING_LOOPS = 20000
STEPS_PER_LOOP = 2

RANK_K = 20
NUM_ACTIONS = 20

AGENT_ALPHA = 10.0
EPSILON = 0.05
LAYERS = (50, 50, 50)
LR = 0.005
DROPOUT_RATE = 0.2


In [22]:

def run_bandit_training(root_dir, data_path, agent_name="LinUCB", per_arm=False):
    tf.compat.v1.enable_v2_behavior()

    if not data_path:
        raise ValueError('Please specify the location of the MovieLens data file.')

    # Environment
    if per_arm:
        env = movielens_per_arm_py_environment.MovieLensPerArmPyEnvironment(
            data_path, RANK_K, BATCH_SIZE, num_actions=NUM_ACTIONS, csv_delimiter='\t')
    else:
        env = movielens_py_environment.MovieLensPyEnvironment(
            data_path, RANK_K, BATCH_SIZE, num_movies=NUM_ACTIONS, csv_delimiter='\t')

    environment = tf_py_environment.TFPyEnvironment(env)

    # Optimal reward/action functions
    optimal_reward_fn = functools.partial(
        environment_utilities.compute_optimal_reward_with_movielens_environment,
        environment=environment)
    optimal_action_fn = functools.partial(
        environment_utilities.compute_optimal_action_with_movielens_environment,
        environment=environment)

    # Agent selection
    if agent_name == 'LinUCB':
        agent = lin_ucb_agent.LinearUCBAgent(
            time_step_spec=environment.time_step_spec(),
            action_spec=environment.action_spec(),
            tikhonov_weight=0.001,
            alpha=AGENT_ALPHA,
            dtype=tf.float32,
            accepts_per_arm_features=per_arm,
        )
    elif agent_name == 'LinTS':
        agent = lin_ts_agent.LinearThompsonSamplingAgent(
            time_step_spec=environment.time_step_spec(),
            action_spec=environment.action_spec(),
            dtype=tf.float32,
            accepts_per_arm_features=per_arm,
        )
    elif agent_name == 'epsGreedy':
        if per_arm:
            network = global_and_arm_feature_network.create_feed_forward_dot_product_network(
                environment.time_step_spec().observation,
                global_layers=LAYERS,
                arm_layers=LAYERS,
            )
        else:
            network = q_network.QNetwork(
                input_tensor_spec=environment.time_step_spec().observation,
                action_spec=environment.action_spec(),
                fc_layer_params=LAYERS,
            )
        agent = eps_greedy_agent.NeuralEpsilonGreedyAgent(
            time_step_spec=environment.time_step_spec(),
            action_spec=environment.action_spec(),
            reward_network=network,
            optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=LR),
            epsilon=EPSILON,
            emit_policy_info='predicted_rewards_mean',
            info_fields_to_inherit_from_greedy=['predicted_rewards_mean'],
        )
    elif agent_name == 'DropoutTS':
        train_step_counter = tf.compat.v1.train.get_or_create_global_step()

        def dropout_fn():
            return tf.math.maximum(
                tf.math.reciprocal_no_nan(1.01 + tf.cast(train_step_counter, tf.float32)),
                0.0003,
            )

        agent = dropout_ts_agent.DropoutThompsonSamplingAgent(
            time_step_spec=environment.time_step_spec(),
            action_spec=environment.action_spec(),
            dropout_rate=dropout_fn,
            network_layers=LAYERS,
            optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=LR),
        )
    else:
        raise ValueError(f"Unknown agent: {agent_name}")

    # Metrics
    regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn)
    suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric(optimal_action_fn)
    
    # Train the agent
    trainer.train(
        root_dir=root_dir,
        agent=agent,
        environment=environment,
        training_loops=TRAINING_LOOPS,
        steps_per_loop=STEPS_PER_LOOP,
        additional_metrics=[regret_metric, suboptimal_arms_metric],
    )

In [None]:
    run_bandit_training(
    root_dir='/tmp/movielens_bandit_output',
    data_path='u.data',
    agent_name='LinUCB',  # Options: LinUCB, LinTS, epsGreedy, DropoutTS
    per_arm=False
)




W0407 01:30:19.293223 139877721212096 polymorphic_function.py:156] 5 out of the last 5 calls to <function TFStepMetric._update_state at 0x7f37025d32e0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.




W0407 01:30:19.566129 139877721212096 polymorphic_function.py:156] 6 out of the last 6 calls to <function TFStepMetric._update_state at 0x7f3702318790> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
W0407 01:30:21.213129 139877721212096 function_type.py:509] `0/step_type` is not a valid tf.function parameter name. Sanitizing to `arg_0_step_type`.
W0407 01:30:21.214294 139877721212096 function_type.py:509] `0/reward` is not a valid tf.function parameter name. Sanitizing to `ar

INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_16/assets


I0407 01:30:22.794469 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_16/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_1616/assets


I0407 01:31:40.672263 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_1616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_3216/assets


I0407 01:32:57.976151 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_3216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_4816/assets


I0407 01:34:16.026900 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_4816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_6416/assets


I0407 01:35:33.576591 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_6416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_8016/assets


I0407 01:36:51.783413 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_8016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_9616/assets


I0407 01:38:09.644777 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_9616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_11216/assets


I0407 01:39:27.305259 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_11216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_12816/assets


I0407 01:40:46.428100 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_12816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_14416/assets


I0407 01:42:04.589018 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_14416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_16016/assets


I0407 01:43:23.308049 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_16016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_17616/assets


I0407 01:44:40.585840 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_17616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_19216/assets


I0407 01:45:58.942997 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_19216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_20816/assets


I0407 01:47:18.430661 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_20816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_22416/assets


I0407 01:48:36.711165 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_22416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_24016/assets


I0407 01:49:52.930113 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_24016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_25616/assets


I0407 01:51:08.512994 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_25616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_27216/assets


I0407 01:52:24.443710 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_27216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_28816/assets


I0407 01:53:40.398205 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_28816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_30416/assets


I0407 01:54:55.339814 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_30416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_32016/assets


I0407 01:56:10.577513 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_32016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_33616/assets


I0407 01:57:24.871146 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_33616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_35216/assets


I0407 01:58:39.781240 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_35216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_36816/assets


I0407 01:59:55.098109 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_36816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_38416/assets


I0407 02:01:11.470948 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_38416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_40016/assets


I0407 02:02:27.283502 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_40016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_41616/assets


I0407 02:03:43.026297 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_41616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_43216/assets


I0407 02:04:57.948599 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_43216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_44816/assets


I0407 02:06:13.887927 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_44816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_46416/assets


I0407 02:07:29.749133 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_46416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_48016/assets


I0407 02:08:46.557408 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_48016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_49616/assets


I0407 02:10:01.195787 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_49616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_51216/assets


I0407 02:11:15.796187 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_51216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_52816/assets


I0407 02:12:30.583363 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_52816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_54416/assets


I0407 02:13:45.175004 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_54416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_56016/assets


I0407 02:14:59.991133 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_56016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_57616/assets


I0407 02:16:15.441553 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_57616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_59216/assets


I0407 02:17:30.721890 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_59216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_60816/assets


I0407 02:18:45.112826 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_60816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_62416/assets


I0407 02:20:01.357117 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_62416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_64016/assets


I0407 02:21:17.390554 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_64016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_65616/assets


I0407 02:22:32.703915 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_65616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_67216/assets


I0407 02:23:47.374554 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_67216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_68816/assets


I0407 02:25:02.559174 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_68816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_70416/assets


I0407 02:26:18.162041 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_70416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_72016/assets


I0407 02:27:32.846047 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_72016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_73616/assets


I0407 02:28:48.212586 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_73616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_75216/assets


I0407 02:30:03.973675 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_75216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_76816/assets


I0407 02:31:19.744331 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_76816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_78416/assets


I0407 02:32:34.168520 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_78416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_80016/assets


I0407 02:33:50.496989 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_80016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_81616/assets


I0407 02:35:06.685473 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_81616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_83216/assets


I0407 02:36:23.202152 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_83216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_84816/assets


I0407 02:37:37.082172 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_84816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_86416/assets


I0407 02:38:52.233558 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_86416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_88016/assets


I0407 02:40:07.211996 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_88016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_89616/assets


I0407 02:41:22.977352 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_89616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_91216/assets


I0407 02:42:38.031419 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_91216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_92816/assets


I0407 02:43:53.146565 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_92816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_94416/assets


I0407 02:45:09.508979 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_94416/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_96016/assets


I0407 02:46:25.686778 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_96016/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_97616/assets


I0407 02:47:40.422857 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_97616/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_99216/assets


I0407 02:48:56.022187 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_99216/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_100816/assets


I0407 02:50:12.061190 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_100816/assets


INFO:tensorflow:Assets written to: /tmp/movielens_bandit_output/policy_102416/assets


I0407 02:51:27.884116 139877721212096 builder_impl.py:801] Assets written to: /tmp/movielens_bandit_output/policy_102416/assets


In [None]:
%load_ext tensorboard
%tensorboard --logdir /tmp/movielens_bandit_output
