In [18]:
import tensorflow as tf
import numpy as np
import sys
sys.path.append("../")
from configs import ParseParams
from VRP.vrp_utils import DataGenerator

In [19]:
args, prt = ParseParams()
batch_size = 4
args['batch_size'] = batch_size
data_Gen = DataGenerator(args)

actor_net_lr: 0.0001
agent_type: attention
batch_size: 128
beam_width: 5
capacity: 20
critic_net_lr: 0.0001
data_dir: ./data
decode_len: 20
demand_max: 9
disable_tqdm: True
dropout: 0.1
embedding_dim: 30
embedding_graph: 2
entropy_coeff: 0.0
forget_bias: 1.0
gpu: 3
hidden_dim: 128
infer_type: batch
input_dim: 3
is_train: True
load_path: 
log_dir: logs/vrp10-2021-09-19_12-14-49
log_interval: 200
mask_glimpses: True
mask_pointer: True
max_grad_norm: 2.0
min_trucks: False
model_dir: logs/vrp10-2021-09-19_12-14-49\model
n_cust: 10
n_glimpses: 0
n_nodes: 11
n_process_blocks: 3
n_train: 260000
random_seed: 24601
rnn_layers: 1
save_interval: 1000
stdout_print: True
tanh_exploration: 10.0
task: vrp10
task_name: vrp
test_interval: 200
test_size: 1000
ups: False
use_tanh: False
Created train iterator.
Loading dataset for vrp-size-1000-len-11-test.txt...


In [20]:
class Env(object):
    def __init__(self ):
        '''
        This is the environment for VRP.
        Inputs:
            args: the parameter dictionary. It should include:
                args['n_nodes']: number of nodes in VRP
                args['n_custs']: number of customers in VRP
                args['input_dim']: dimension of the problem which is 3
        '''
        self.capacity = 20
        self.n_nodes = 11
        self.n_cust = 10
        self.input_dim = 3
        self.input_data = data_Gen.get_train_next()
        self.input_data = tf.cast( self.input_data, dtype=tf.float32)
        #self.input_data = tf.compat.v1.placeholder(tf.float32,\
        #    shape=[None,self.n_nodes,self.input_dim])       # The dimension of the first (None) can be of any size

        #self.embeded_data = tf.compat.v1.placeholder(tf.float32,shape=[None,self.n_nodes,30])
        #self.input_data_norm = tf.compat.v1.placeholder(tf.float32,\
        #    shape=[None,self.n_nodes,self.input_dim])       # The dimension of the first (None) can be of any size

        self.input_pnt = self.input_data[:,:,:(self.input_dim -1)]      # all but demand
        self.demand = self.input_data[:,:,-1]
        self.batch_size = tf.shape(input=self.input_pnt)[0]

In [21]:
class AttentionVRPCritic(object):
    """A generic attention module for the attention in vrp model"""
    def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):

        self.use_tanh = use_tanh
        self._scope = _scope

        with tf.compat.v1.variable_scope(_scope+_name):
            # self.v: is a variable with shape [1 x dim]
            self.v = tf.compat.v1.get_variable('v',[1,dim],
                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
            self.v = tf.expand_dims(self.v,2)
            
        self.emb_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/emb_d') #conv1d
        self.project_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_d') #conv1d_1
        
        self.project_query = tf.compat.v1.layers.Dense(dim,_scope=_scope+_name +'/proj_q') #
        self.project_ref = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_e') #conv1d_2

        self.C = C  # tanh exploration parameter
        self.tanh = tf.nn.tanh
        
    def __call__(self, query, ref, env):
        """
        This function gets a query tensor and ref rensor and returns the logit op.
        Args: 
            query: is the hidden state of the decoder at the current
                time step. [batch_size x dim]
            ref: the set of hidden states from the encoder. 
                [batch_size x max_time x dim]

            env: keeps demand ond load values and help decoding. Also it includes mask.
                env.mask: a matrix used for masking the logits and glimpses. It is with shape
                         [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
                         positive number in this mask means that the node cannot be selected as next 
                         decision point.
                env.demands: a list of demands which changes over time.

        Returns:
            e: convolved ref with shape [batch_size x max_time x dim]
            logits: [batch_size x max_time]
        """
        # we need the first demand value for the critic
        demand = env.input_data[:,:,-1]
        max_time = tf.shape(input=demand)[1]

        # embed demand and project it
        # emb_d:[batch_size x max_time x dim ]
        emb_d = self.emb_d(tf.expand_dims(demand,2))
        # d:[batch_size x max_time x dim ]
        d = self.project_d(emb_d)


        # expanded_q,e: [batch_size x max_time x dim]
        e = self.project_ref(ref)
        q = self.project_query(query) #[batch_size x dim]
        expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])

        # v_view:[batch_size x dim x 1]
        v_view = tf.tile( self.v, [tf.shape(input=e)[0],1,1]) 
        
        # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
        #       [batch_size x max_time]
        u = tf.squeeze(tf.matmul(self.tanh(expanded_q + e + d), v_view),2)

        if self.use_tanh:
            logits = self.C * self.tanh(u)
        else:
            logits = u  

        return e, logits

In [None]:
ref = tf.cast( np.random.randn( args['batch_size'], args['n_nodes'], args['embedding_dim']), tf.float32 )
env = Env()
with tf.compat.v1.variable_scope("Critic"):
                with tf.compat.v1.variable_scope("Encoder"):
                    # init states
                    initial_state = tf.zeros([args['rnn_layers'], 2, batch_size, args['hidden_dim']])
                    l = tf.unstack(initial_state, axis=0)
                    rnn_tuple_state = tuple([tf.compat.v1.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1]) # index + corresponds to coord
                              for idx in range(args['rnn_layers'])])

                    hy = rnn_tuple_state[0][1]

                with tf.compat.v1.variable_scope("Process"):
                    for i in range(args['n_process_blocks']):

                        process = AttentionVRPCritic(args['hidden_dim'],_name="P"+str(i))
                        e,logit = process(hy, ref, env)

                        prob = tf.nn.softmax(logit)
                        # hy : [batch_size x 1 x sourceL] * [batch_size  x sourceL x hidden_dim]  ->
                        #[batch_size x h_dim ]
                        hy = tf.squeeze(tf.matmul(tf.expand_dims(prob,1), e ) ,1)

                with tf.compat.v1.variable_scope("Linear"):
                    v = tf.squeeze(tf.compat.v1.layers.dense(tf.compat.v1.layers.dense(hy,args['hidden_dim']\
                                                               ,tf.nn.relu,name='L1'),1,name='L2'),1)
                    print( v.numpy())

[ 0.08945787 -0.1517034  -0.1602616  -0.44746327]
