### Deep Optimal Stopping - Implementation with tensorflow estimator

#### General setting - demonstrated through the dice example

In [1]:
import tensorflow as tf
import numpy as np 
import scipy
tf.__version__

'1.13.1'

#### Creating samples in the same format as before. In an $M \times 3$ matrix the rows represent the outcomes,  i.e. the 3 consecutive tosses

In [2]:
# define sample size for training 
M = 30000

# create a sample of M x 3
# In this example we only consider 3 tosses, hence only two steps with choices to stop at
dice = np.random.randint(low=1, high=7, size=(M, 3))

In [3]:
# try with 4 
dice4 = np.random.randint(low=1, high=7, size=(M, 4))

In [4]:
# see the first 10 paths from the samples generated above
print("The first 10 samples: ")
print(dice[:10, :])

The first 10 samples: 
[[2 6 5]
 [5 1 6]
 [6 1 3]
 [5 3 5]
 [6 6 1]
 [6 2 6]
 [6 2 2]
 [4 2 1]
 [5 2 6]
 [4 2 1]]


In [5]:
# see the first 10 paths from the samples generated above
print("The first 10 samples: ")
print(dice4[:10, :])

The first 10 samples: 
[[6 6 1 3]
 [1 1 4 6]
 [6 1 2 4]
 [5 1 3 6]
 [1 6 5 5]
 [3 5 2 3]
 [6 6 1 5]
 [2 6 5 2]
 [1 6 1 4]
 [6 5 5 2]]


#### Define the input functions for training and evaluation in tensorflow estimator

In [6]:
# define input function for training with estimator, and use the dice np dataset 
def numpy_train_input_fn(samples): 
    # get the number of time steps and number of samples
    n_timeSteps = np.shape(samples)[-1]
    n_samples = np.shape(samples)[0]
    return tf.estimator.inputs.numpy_input_fn(
        x = dict(zip(np.arange(n_timeSteps), np.reshape(samples.T.astype(float), 
                                                                             [n_timeSteps, n_samples, 1]))),
        
        
       # x={"t0": np.reshape(dice[:, 0].astype(float), (len(dice[:, 0]), 1)), 
       #    "t1": np.reshape(dice[:, 1].astype(float), (len(dice[:, 1]), 1)), 
       #    "t2": np.reshape(dice[:, 2].astype(float), (len(dice[:, 2]), 1))},
        batch_size = 64, 
        num_epochs = 25, 
        shuffle = True, 
        queue_capacity = 1000
    )

# define input function for evaluation
def numpy_eval_input_fn(samples):
    # get the number of time steps and number of samples
    n_timeSteps = np.shape(samples)[-1]
    n_samples = np.shape(samples)[0]
    return tf.estimator.inputs.numpy_input_fn(
       x = dict(zip(np.arange(n_timeSteps), np.reshape(samples.T.astype(float), 
                                                                             [n_timeSteps, n_samples, 1]))),
        
        
       # x={"t0": np.reshape(dice[:, 0].astype(float), (len(dice[:, 0]), 1)), 
       #    "t1": np.reshape(dice[:, 1].astype(float), (len(dice[:, 1]), 1)), 
       #    "t2": np.reshape(dice[:, 2].astype(float), (len(dice[:, 2]), 1))},
        num_epochs = 1, 
        shuffle = False
    )

#### Model definition

In [None]:
# ----------------------------------------------
# with the for loop towards the general timegrid 
# ----------------------------------------------

def my_model_fn(features, mode, params):  
    
    """Defining the custom architecture"""
    
    # input_set = tf.concat([features['t0'], features['t1'], features['t2']], 1, name="input_set")
    
    N = len(features)
    
    # creating the input set from the dictionary for practical considerations later
    input_set = tf.concat([features[0], features[1]], 1, name="input_set")    
    for i in range(N-2):
        input_set = tf.concat([input_set, features[2+i]], 1)
    
    # create a dictionary that stores the NNs; key - time point; value - network component 
    networks = {x: {'layers': {}, 
                    'costs': {}, 
                    'optimizers': {}
                   } for x in range(N)}
    
    # create a dictionary that stores the taus for each n \in N 
    taus = {}
    
    nextInputs = {}
    
    train_ops = {}
    
    # Step 1 - Configure the network 
    
    one = tf.constant(1, dtype=tf.float64)
    
    for t in range(N-2, -1, -1):
        with tf.variable_scope("t"+str(t), reuse=tf.AUTO_REUSE):
            networks[t]['layers']['first'] = tf.layers.dense(features[t], 51, activation=tf.nn.relu,
                                            kernel_initializer=tf.glorot_normal_initializer(),
                                            #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01), 
                                            #kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=0.3),
                                            name="first_layer_"+str(t))
            networks[t]['layers']['second'] = tf.layers.dense(networks[t]['layers']['first'], 51, activation=tf.nn.relu, 
                                            kernel_initializer=tf.glorot_normal_initializer(),
                                            #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                            #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3), 
                                            name="second_layer_"+str(t))
            networks[t]['layers']['logits'] = tf.layers.dense(networks[t]['layers']['second'], 1, activation=None, 
                                          kernel_initializer=tf.glorot_normal_initializer(),
                                          #kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                          #kernel_regularizer = tf.contrib.layers.l2_regularizer(scale=0.3),
                                          name="logits_"+str(t)) 
            networks[t]['layers']['F_theta'] = tf.nn.sigmoid(networks[t]['layers']['logits'], name="F_theta_"+str(t))
            networks[t]['layers']['f_theta'] = tf.cast(tf.clip_by_value(tf.sign(networks[t]['layers']['logits']), 0, 2), dtype=tf.int32, name="f_theta_"+str(t))
        
        
        # Define reward and optimizer
        
        if t==N-2:
            nextInputs[t] = features[t+1]
        
        
            networks[t]['costs']['reward'] = tf.add(tf.multiply(networks[t]['layers']['F_theta'], features[1]), 
                        tf.multiply((one-networks[t]['layers']['F_theta']), nextInputs[t]), 
                       name = "reward_"+str(t))
    
            networks[t]['costs']['rAvg'] = tf.reduce_mean(networks[t]['costs']['reward']) 
            networks[t]['costs']['cost'] = tf.scalar_mul(-1,networks[t]['costs']['rAvg'])
            networks[t]['optimizers'] = tf.train.AdamOptimizer(learning_rate=0.001)
    
        
            
            train_ops[t] = networks[t]['optimizers'].minimize(networks[t]['costs']['cost'], 
                                        var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="t"+str(t)))
    
            taus[t] = (N-2)*networks[t]['layers']['f_theta'] + (N-1)*(1-networks[t]['layers']['f_theta'])
        
    
    
        else:
    
            nextInputs[t] = tf.gather_nd(input_set, 
                       indices=tf.concat([tf.reshape(tf.range(tf.shape(taus[t+1])[0]), shape=tf.shape(taus[t+1])), taus[t+1]], 1),
                       name="g"+str(t))
            
            networks[t]['costs']['reward'] = tf.add(tf.multiply(networks[t]['layers']['F_theta'], features[0]), 
                            tf.multiply((one-networks[t]['layers']['F_theta']), nextInputs[t]), name = "reward_"+str(t))
            networks[t]['costs']['rAvg'] = tf.reduce_mean(networks[t]['costs']['reward'])
            networks[t]['costs']['cost'] = tf.scalar_mul(-1,networks[t]['costs']['rAvg'])
    
            networks[t]['optimizers'] = tf.train.AdamOptimizer(learning_rate=0.001)
        
             
            train_ops[t] = networks[t]['optimizers'].minimize(networks[t]['costs']['cost'], 
                                        var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="t"+str(t)))

    
            taus[t] = tf.math.reduce_sum(t*networks[t]['layers']['f_theta']+
                    [i*networks[i]['layers']['f_theta']*tf.math.reduce_prod([1-networks[j]['layers']['f_theta'] for j in range(t, i)], axis=0) 
                     for i in range((t+1),(N-1))]+
                     (N-1)*tf.math.reduce_prod([1-networks[k]['layers']['f_theta'] for k in range(t, (N-1))], axis=0)           
                                , axis=0) # this works ? 
    
    

        
    g_0 = tf.gather_nd(input_set, 
                       indices=tf.concat([tf.reshape(tf.range(tf.shape(taus[0])[0]), shape=tf.shape(taus[0])), 
                                          taus[0]], 1), 
                       name="g_0")
    
    
    price = tf.reduce_mean(g_0, name="price")
    
    global_step = tf.train.get_global_step()
    update_global_step = tf.assign(global_step, global_step + 1, name = 'update_global_step')
    
    train_op =tf.group([train_ops[_] for _ in train_ops.keys()])
    cost = tf.math.reduce_sum([networks[kk]['costs']['cost'] for kk in train_ops.keys()]) #cost_1st+cost_2nd 
    
    op_ = tf.train.AdamOptimizer(learning_rate=0.005)
    train_op_ =op_.minimize(cost)
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=price,
            evaluation_hooks=None)
       
    
    # Provide an estimator spec for `ModeKeys.PREDICT`.
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions={"price": price})
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
            mode=mode, 
            loss= cost,  #tf.cond((global_step < 5000), lambda: cost_2nd, lambda: cost_1st), 
            train_op= tf.group(train_op, update_global_step),
            #tf.cond((global_step < 5000), lambda: tf.group(train_op2, update_global_step), 
                     #       lambda: tf.group(train_op1, update_global_step)),#tf.group(train_op, update_global_step), 
            training_hooks=None)

In [None]:
#MODEL_DIR = '/Users/Cellini/Desktop/Quant/DL Udacity/DLexs/Estimator'


In [8]:
nn = tf.estimator.Estimator(model_fn=my_model_fn) #, model_dir=MODEL_DIR)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpmmcvyjoc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x635e157b8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [9]:
nn.train(input_fn=numpy_train_input_fn(dice), steps=20000)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use keras.layers.dense instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpmmcvyjoc/model.ckpt.
INFO:tensorflow:loss = -7.387059620527715, step = 1
INFO:tensorflow:global_step/sec: 41.2992
INFO:tensorflow:loss = -7.556451782245364, step = 101 (2.492 sec)
INFO:tensorflow:global_step/sec: 166.859
INFO:tensorflow:loss = -8.801888365766278, step = 201 (0.529 sec

INFO:tensorflow:loss = -9.348068186258988, step = 5801 (1.023 sec)
INFO:tensorflow:global_step/sec: 118.235
INFO:tensorflow:loss = -8.242180741965441, step = 5901 (0.845 sec)
INFO:tensorflow:global_step/sec: 167.537
INFO:tensorflow:loss = -8.59369419149037, step = 6001 (0.597 sec)
INFO:tensorflow:global_step/sec: 101.948
INFO:tensorflow:loss = -8.572230530550966, step = 6101 (0.980 sec)
INFO:tensorflow:global_step/sec: 141.148
INFO:tensorflow:loss = -8.038308112031318, step = 6201 (0.709 sec)
INFO:tensorflow:global_step/sec: 99.479
INFO:tensorflow:loss = -9.331021115820477, step = 6301 (1.005 sec)
INFO:tensorflow:global_step/sec: 122.008
INFO:tensorflow:loss = -9.56489158751361, step = 6401 (0.820 sec)
INFO:tensorflow:global_step/sec: 161.255
INFO:tensorflow:loss = -9.292905686255327, step = 6501 (0.620 sec)
INFO:tensorflow:global_step/sec: 160.33
INFO:tensorflow:loss = -9.169879144641433, step = 6601 (0.624 sec)
INFO:tensorflow:global_step/sec: 206.153
INFO:tensorflow:loss = -8.171872

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x635e15e80>

In [10]:
ev = nn.evaluate(input_fn=numpy_eval_input_fn(dice))
print("Price: %s" % ev["loss"])


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Starting evaluation at 2019-10-03T20:58:39Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpmmcvyjoc/model.ckpt-11720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-10-03-20:58:41
INFO:tensorflow:Saving dict for global step 11720: global_step = 11720, loss = 4.6849513
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 11720: /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpmmcvyjoc/model.ckpt-11720
Price: 4.6849513


#### Result 
The solver should produce a result around $4.6\dots$, which is close to the analytical solution $28/6\approx 4.66$. Evaluating the trained model on a separate, similarly sized sample, the result should be reasonably close, $4.6\dots$, to the result obtained on the training sample.

In [11]:
# Create a separate sample for evaluation
dice_eval = np.random.randint(low=1, high=7, size=(M, 3))

In [12]:
# Use the evaluation sample to get the price
ev_ck = nn.evaluate(input_fn=numpy_eval_input_fn(dice_eval))
print("Price: %s" % ev_ck["loss"])


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-10-03T20:58:56Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpmmcvyjoc/model.ckpt-11720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-10-03-20:58:58
INFO:tensorflow:Saving dict for global step 11720: global_step = 11720, loss = 4.675166
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 11720: /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpmmcvyjoc/model.ckpt-11720
Price: 4.675166


In [13]:
nn4 = tf.estimator.Estimator(model_fn=my_model_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpm4jcpv4_', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x6389f7668>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [14]:
nn4.train(input_fn=numpy_train_input_fn(dice4), steps=20000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpm4jcpv4_/model.ckpt.
INFO:tensorflow:loss = -10.793564131745416, step = 1
INFO:tensorflow:global_step/sec: 32.5349
INFO:tensorflow:loss = -10.629389626936266, step = 101 (3.074 sec)
INFO:tensorflow:global_step/sec: 189.163
INFO:tensorflow:loss = -11.161982909233325, step = 201 (0.529 sec)
INFO:tensorflow:global_step/sec: 141.343
INFO:tensorflow:loss = -10.932331246046243, step = 301 (0.708 sec)
INFO:tensorflow:global_step/sec: 184.5
INFO:tensorflow:loss = -10.809535245428972, step = 401 (0.542 sec)
INFO:tensorflow:global_step/sec: 189.762
INFO:tensorflow:loss = -11.651199206413672, step = 501 (0.527 sec)
INFO:tensorflow:global_step/sec: 140.638
INFO:tensorf

INFO:tensorflow:global_step/sec: 177.972
INFO:tensorflow:loss = -11.483294864090563, step = 7301 (0.562 sec)
INFO:tensorflow:global_step/sec: 180.118
INFO:tensorflow:loss = -10.836353414186222, step = 7401 (0.555 sec)
INFO:tensorflow:global_step/sec: 146.906
INFO:tensorflow:loss = -11.501215225292722, step = 7501 (0.681 sec)
INFO:tensorflow:global_step/sec: 176.94
INFO:tensorflow:loss = -11.580967976453515, step = 7601 (0.565 sec)
INFO:tensorflow:global_step/sec: 186.154
INFO:tensorflow:loss = -11.338078328536517, step = 7701 (0.537 sec)
INFO:tensorflow:global_step/sec: 142.66
INFO:tensorflow:loss = -11.020099852496681, step = 7801 (0.701 sec)
INFO:tensorflow:global_step/sec: 175.566
INFO:tensorflow:loss = -11.605092702648932, step = 7901 (0.570 sec)
INFO:tensorflow:global_step/sec: 180.576
INFO:tensorflow:loss = -11.056433017053003, step = 8001 (0.554 sec)
INFO:tensorflow:global_step/sec: 120.545
INFO:tensorflow:loss = -11.0279403259476, step = 8101 (0.830 sec)
INFO:tensorflow:global_

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x6385f1ef0>

In [15]:
ev4 = nn4.evaluate(input_fn=numpy_eval_input_fn(dice4))
print("Price: %s" % ev4["loss"])


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-10-03T21:00:31Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpm4jcpv4_/model.ckpt-11720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-10-03-21:00:34
INFO:tensorflow:Saving dict for global step 11720: global_step = 11720, loss = 4.616899
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 11720: /var/folders/16/6vfzqvh50sv0n670v2qmktsw0000gn/T/tmpm4jcpv4_/model.ckpt-11720
Price: 4.616899
