### Deep Optimal Stopping - Dice problem - Implementation with tensorflow estimator 

In [1]:
import tensorflow as tf
import numpy as np 
import scipy
tf.__version__

'1.8.0'

#### Creating samples in the same format as before. In an $M \times 3$ matrix the rows represent the outcomes,  i.e. the 3 consecutive tosses

In [2]:
# define sample size for training 
M = 30000

# create a sample of M x 3
# In this example we only consider 3 tosses, hence only two steps with choices to stop at
dice = np.random.randint(low=1, high=7, size=(M, 3))

In [3]:
# see the first 10 paths from the samples generated above
print("The first 10 samples: ")
print(dice[:10, :])

The first 10 samples: 
[[5 3 4]
 [5 4 5]
 [6 4 3]
 [6 3 3]
 [1 3 2]
 [3 3 1]
 [1 2 5]
 [6 4 2]
 [1 4 5]
 [5 3 6]]


#### Define the input functions for training and evaluation in tensorflow estimator

In [4]:
# define input function for training with estimator, and use the dice np dataset 
def numpy_train_input_fn(dice): 
    return tf.estimator.inputs.numpy_input_fn(
        x={"t0": np.reshape(dice[:, 0].astype(float), (len(dice[:, 0]), 1)), 
           "t1": np.reshape(dice[:, 1].astype(float), (len(dice[:, 1]), 1)), 
           "t2": np.reshape(dice[:, 2].astype(float), (len(dice[:, 2]), 1))},
        batch_size = 64, 
        num_epochs = 25, 
        shuffle = True, 
        queue_capacity = 1000
    )

# define input function for evaluation
def numpy_eval_input_fn(dice):
    return tf.estimator.inputs.numpy_input_fn(
        x={"t0": np.reshape(dice[:, 0].astype(float), (len(dice[:, 0]), 1)), 
           "t1": np.reshape(dice[:, 1].astype(float), (len(dice[:, 1]), 1)), 
           "t2": np.reshape(dice[:, 2].astype(float), (len(dice[:, 2]), 1))},
        num_epochs = 1, 
        shuffle = False
    )

#### Model definition

The training function below can be defined with a conditional function, so that after a certain number of training steps the optimizer and cost function changes to another one. This is an alternative way to handle the trainings of two separate networks distinctly. Remove the comments $\#$ from the training mode definition in the code below to use this implementation. 

In [5]:
def my_model_fn(features, mode, params):  
    
    """Defining the custon architecture"""
    
    input_set = tf.concat([features['t0'], features['t1'], features['t2']], 1, name="input_set")
    
    # Step 1 - Configure the network 
    with tf.variable_scope("2nd", reuse=tf.AUTO_REUSE):
        
        first_hidden_layer_2nd = tf.layers.dense(features['t1'], 51, activation=tf.nn.relu,
                                            kernel_initializer=tf.glorot_normal_initializer(),
                                            #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01), 
                                            #kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=0.3),
                                            name="first_layer_2nd")
        second_hidden_layer_2nd = tf.layers.dense(first_hidden_layer_2nd, 51, activation=tf.nn.relu, 
                                            kernel_initializer=tf.glorot_normal_initializer(),
                                            #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                            #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3), 
                                            name="second_layer_2nd")
        logits_2nd = tf.layers.dense(second_hidden_layer_2nd, 1, activation=None, 
                                          kernel_initializer=tf.glorot_normal_initializer(),
                                          #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                          #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3),
                                          name="logits_2nd")
        F_theta_2nd = tf.nn.sigmoid(logits_2nd, name="F_theta_2nd")
        f_theta_2nd = tf.cast(tf.clip_by_value(tf.sign(logits_2nd), 0, 2), 
                                       dtype=tf.int32, name="f_theta_2nd")
        
        #tau_1 = 1*output_layer_2nd_ind + 2*(1-output_layer_2nd_ind)
        
        # Define reward and optimizer
        one = tf.constant(1, dtype=tf.float64)
        reward_2nd = tf.add(tf.multiply(F_theta_2nd, features['t1']), 
                        tf.multiply((one-F_theta_2nd), features['t2']), 
                       name = "reward_2nd")
    
        rAvg_2nd = tf.reduce_mean(reward_2nd) 
        cost_2nd = tf.scalar_mul(-1,rAvg_2nd)
        op2 = tf.train.AdamOptimizer(learning_rate=0.001)
    
        
        second_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="2nd")
        train_op2 = op2.minimize(cost_2nd, var_list=second_train_vars)
    
    
    tau_1 = 1*f_theta_2nd + 2*(1-f_theta_2nd)
        
    col_1 = tf.reshape(tf.range(tf.shape(tau_1)[0]), shape=tf.shape(tau_1), name="col_1")
    indices_1 = tf.concat([col_1, tau_1], 1, name="indices_1")
    g_1 = tf.gather_nd(input_set, indices=indices_1, name="g_1")
    
    
    with tf.variable_scope("1st", reuse=tf.AUTO_REUSE):
        
        first_hidden_layer_1st = tf.layers.dense(features['t0'], 51, activation=tf.nn.relu,
                                            #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                            #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3), 
                                            kernel_initializer=tf.glorot_normal_initializer(),
                                            name="first_layer_1st")
        second_hidden_layer_1st = tf.layers.dense(first_hidden_layer_1st, 51, activation=tf.nn.relu, 
                                            #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01), 
                                            #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3),
                                            kernel_initializer=tf.glorot_normal_initializer(),
                                            name="second_layer_1st")
        
        logits_1st = tf.layers.dense(second_hidden_layer_1st, 1, activation=None,
                                          #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                          #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3),
                                          kernel_initializer=tf.glorot_normal_initializer(),
                                          name="logits_1st")
        F_theta_1st = tf.nn.sigmoid(logits_1st, name="output_layer_1st")
        f_theta_1st = tf.cast(tf.clip_by_value(tf.sign(logits_1st), 0, 2), 
                                       dtype=tf.int32, name="f_theta_1st")
    
        reward_1st = tf.add(tf.multiply(F_theta_1st, features['t0']), 
                            tf.multiply((one-F_theta_1st), g_1), name = "reward_1st")
        rAvg_1st = tf.reduce_mean(reward_1st)
        cost_1st = tf.scalar_mul(-1,rAvg_1st)
    
        op1 = tf.train.AdamOptimizer(learning_rate=0.001)
        
        first_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="1st")
        train_op1 = op1.minimize(cost_1st, var_list=first_train_vars)

        
    tau_0 = (0*f_theta_1st + 1*f_theta_2nd*(1-f_theta_1st)
                +2*(1-f_theta_1st)*(1-f_theta_2nd))
    col_0 = tf.reshape(tf.range(tf.shape(tau_0)[0]), shape=tf.shape(tau_0), name="col_0")
    indices_0 = tf.concat([col_0, tau_0], 1, name="indices_0")
    g_0 = tf.gather_nd(input_set, indices=indices_0, name="g_0")
        
    
    price = tf.reduce_mean(g_0, name="price")
    
    global_step = tf.train.get_global_step()
    update_global_step = tf.assign(global_step, global_step + 1, name = 'update_global_step')
    
    train_op =tf.group(train_op1, train_op2)
    cost = cost_1st+cost_2nd 
    
    op_ = tf.train.AdamOptimizer(learning_rate=0.005)
    train_op_ =op_.minimize(cost)
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=price,
            evaluation_hooks=None)
       
    
    # Provide an estimator spec for `ModeKeys.PREDICT`.
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions={"price": price})
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
            mode=mode, 
            loss= cost,  #tf.cond((global_step < 5000), lambda: cost_2nd, lambda: cost_1st), 
            train_op= tf.group(train_op, update_global_step),
            #tf.cond((global_step < 5000), lambda: tf.group(train_op2, update_global_step), 
                     #       lambda: tf.group(train_op1, update_global_step)),#tf.group(train_op, update_global_step), 
            training_hooks=None)


In [6]:
# define a folder, where the model will be saved
MODEL_DIR = '/Users/Cellini/Desktop/Quant/DL Udacity/DLexs/Estimator'

In [7]:
nn = tf.estimator.Estimator(model_fn=my_model_fn) #, model_dir=MODEL_DIR)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpl4esn6bg', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x10e57b400>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [8]:
nn.train(input_fn=numpy_train_input_fn(dice), steps=20000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpl4esn6bg/model.ckpt.
INFO:tensorflow:loss = -6.53976675783962, step = 1
INFO:tensorflow:global_step/sec: 170.713
INFO:tensorflow:loss = -7.460640595203019, step = 101 (0.587 sec)
INFO:tensorflow:global_step/sec: 447.574
INFO:tensorflow:loss = -8.690484986710391, step = 201 (0.223 sec)
INFO:tensorflow:global_step/sec: 480.024
INFO:tensorflow:loss = -8.390074759993746, step = 301 (0.208 sec)
INFO:tensorflow:global_step/sec: 494.322
INFO:tensorflow:loss = -8.94561356016597, step = 401 (0.202 sec)
INFO:tensorflow:global_step/sec: 464.358
INFO:tensorflow:loss = -9.175860628462049, step = 501 (0.215 sec)
INFO:tensorflow:global_step/sec: 476.198
INFO:tensorflow:lo

INFO:tensorflow:loss = -9.37593895555818, step = 7301 (0.327 sec)
INFO:tensorflow:global_step/sec: 489.383
INFO:tensorflow:loss = -9.374970719121139, step = 7401 (0.203 sec)
INFO:tensorflow:global_step/sec: 371.949
INFO:tensorflow:loss = -8.779772959078846, step = 7501 (0.269 sec)
INFO:tensorflow:global_step/sec: 490.667
INFO:tensorflow:loss = -8.895493546296922, step = 7601 (0.204 sec)
INFO:tensorflow:global_step/sec: 460.337
INFO:tensorflow:loss = -8.311999434464347, step = 7701 (0.217 sec)
INFO:tensorflow:global_step/sec: 388.355
INFO:tensorflow:loss = -9.086880104976016, step = 7801 (0.257 sec)
INFO:tensorflow:global_step/sec: 396.325
INFO:tensorflow:loss = -8.930155733204494, step = 7901 (0.253 sec)
INFO:tensorflow:global_step/sec: 494.501
INFO:tensorflow:loss = -8.75438593015678, step = 8001 (0.202 sec)
INFO:tensorflow:global_step/sec: 420.78
INFO:tensorflow:loss = -8.955064631385218, step = 8101 (0.239 sec)
INFO:tensorflow:global_step/sec: 430.074
INFO:tensorflow:loss = -9.12229

<tensorflow.python.estimator.estimator.Estimator at 0x181d6686d8>

In [9]:
ev = nn.evaluate(input_fn=numpy_eval_input_fn(dice))
print("Price: %s" % ev["loss"])


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-17-23:23:52
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpl4esn6bg/model.ckpt-11720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-17-23:23:53
INFO:tensorflow:Saving dict for global step 11720: global_step = 11720, loss = 4.6521053
Price: 4.6521053


#### Result 
The solver should produce a result around $4.6\dots$, which is close to the analytical solution $28/6\approx 4.66$. Evaluating the trained model on a separate, similarly sized sample, the result should be reasonably close, $4.6\dots$, to the result obtained on the training sample.

In [10]:
# Create a separate sample for evaluation
dice_eval = np.random.randint(low=1, high=7, size=(M, 3))

In [11]:
# Use the evaluation sample to get the price
ev_ck = nn.evaluate(input_fn=numpy_eval_input_fn(dice_eval))
print("Price: %s" % ev_ck["loss"])


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-17-23:24:01
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpl4esn6bg/model.ckpt-11720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-17-23:24:02
INFO:tensorflow:Saving dict for global step 11720: global_step = 11720, loss = 4.6519947
Price: 4.6519947
