In [1]:
import tensorflow as tf
import numpy as np
import sys
sys.path.append("../")
from configs import ParseParams
from DataGenerator import DataGenerator
from env import Env

In [6]:
def reward_func(sample_solution, decode_len=0.0, n_nodes=0.0, depot=None):
    """The reward for the VRP task is defined as the
    negative value of the route length

    Args:
        sample_solution : a list tensor of size decode_len of shape [batch_size x input_dim]
        demands satisfied: a list tensor of size decode_len of shape [batch_size]

    Returns:
        rewards: tensor of size [batch_size]

    Example:
        sample_solution = [[[1,1],[2,2]],[[3,3],[4,4]],[[5,5],[6,6]]]
        sourceL = 3
        batch_size = 2
        input_dim = 2
        sample_solution_tilted[ [[5,5]
                                                    #  [6,6]]
                                                    # [[1,1]
                                                    #  [2,2]]
                                                    # [[3,3]
                                                    #  [4,4]] ]
    """
    # make init_solution of shape [sourceL x batch_size x input_dim]
    if depot != None:
        counter = tf.zeros_like(depot)[:,0]
        depot_visits = tf.cast(tf.equal(sample_solution[0], depot), tf.float32)[:,0]
        for i in range(1,len(sample_solution)):
            interm_depot = tf.cast(tf.equal(sample_solution[i], depot), tf.float32)[:,0]
            counter = tf.add(tf.multiply(counter,interm_depot), interm_depot)
            depot_visits = tf.add(depot_visits, tf.multiply(interm_depot, tf.cast(tf.less(counter,1.5), tf.float32)))
            # depot_visits = tf.add(depot_visits,tf.cast(tf.equal(sample_solution[i], depot), tf.float32)[:,0])

        max_length = tf.stack([depot for d in range(decode_len)],0)
        max_lens_decoded = tf.reduce_sum(input_tensor=tf.pow(tf.reduce_sum(input_tensor=tf.pow(\
            (max_length - sample_solution) ,2), axis=2) , .5), axis=0)

    # make sample_solution of shape [sourceL x batch_size x input_dim]
    sample_solution = tf.stack(sample_solution,0)
    print( sample_solution )
    t1 = sample_solution[-1]
    t2 = tf.expand_dims( t1, 0 )
    
    t3 = sample_solution[:-1]
    t4 = (t2, t3 )
    t5 = tf.concat( t4, 0)
    sample_solution_tilted = tf.concat((tf.expand_dims(sample_solution[-1],0),
         sample_solution[:-1]),0)
    # get the reward based on the route lengths
    t1 = sample_solution_tilted - sample_solution
    t2 = tf.pow( t1, 2 )
    t3 = tf.reduce_sum( input_tensor = t2, axis = 2)
    t4 = tf.pow( t3, .5)
    t5 = tf.reduce_sum( input_tensor = t4)
    route_lens_decoded = tf.reduce_sum(input_tensor=tf.pow(tf.reduce_sum(input_tensor=tf.pow(\
        (sample_solution_tilted - sample_solution) ,2), axis=2) , .5), axis=0)

    if depot != None:
        reward = tf.add(tf.scalar_mul(70.0,tf.scalar_mul(1.0/n_nodes,depot_visits)),tf.scalar_mul(30.0,tf.divide(route_lens_decoded,max_lens_decoded)))
        return reward
    else:
        return route_lens_decoded

In [7]:
args, prt = ParseParams()
batch_size = 2
nodes = 5
cust = nodes-1
args['batch_size'] = batch_size
args['n_nodes'] = nodes
args['n_cust'] = cust
env = Env( args)

actions = []
BatchSequence = tf.expand_dims(tf.cast(tf.range(batch_size ), tf.int64), 1)

for i in range(20):
    idx = []
    for j in range( batch_size):
        idx.append( [i%nodes] )
    batched_idx = tf.concat([BatchSequence,idx],1)
    t1 = tf.tile( env.input_pnt, [1,1,1])
    action = tf.gather_nd( t1, batched_idx )
    actions.append(action)

print( actions )
R = reward_func( actions )
print(R)

Created train iterator.
Loading dataset for vrp-size-1000-len-5-test.txt...
[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.795758  , 0.58931255],
       [0.1841964 , 0.15534529]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.25328094, 0.747913  ],
       [0.4112309 , 0.19615217]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.8642763, 0.1316337],
       [0.6011834, 0.8706626]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.5714688 , 0.83738124],
       [0.0696065 , 0.4771214 ]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.38849595, 0.31845814],
       [0.03260846, 0.5447879 ]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.795758  , 0.58931255],
       [0.1841964 , 0.15534529]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.25328094, 0.747913  ],
       [0.4112309 , 0.19615217]], dtype=float32)>, <tf