In [1]:
import argparse
import os
import numpy as np
from tqdm.auto import tqdm
import tensorflow as tf
import time
import sys
from configs import ParseParams
from DataGenerator import DataGenerator
from env import Env
#from Attention import AttentionVRPCritic, AttentionVRPActor
from embedder import FullGraphEmbedding
import collections

In [2]:
class AttentionVRPActor(object):
    """A generic attention module for the attention in vrp model"""
    def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
        self.use_tanh = use_tanh
        self._scope = _scope

        with tf.compat.v1.variable_scope(_scope+_name):
            # self.v: is a variable with shape [1 x dim]
            self.v = tf.compat.v1.get_variable('v',[1,dim],
                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
            self.v = tf.expand_dims(self.v,2)

        self.emb_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_d' ) #conv1d of kernel size = dim, stride = 1
                                                                                     # here should be filters = dim, kernel size = 1, stride = 1
        self.emb_ld = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_ld' ) #conv1d_2

        self.project_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_d' ) #conv1d_1
        self.project_ld = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ld' ) #conv1d_3
        self.project_query = tf.compat.v1.layers.Dense(dim,_scope=_scope+_name+'/proj_q' ) # fully connected layer, activation is linear
        self.project_ref = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ref' ) #conv1d_4


        self.C = C  # tanh exploration parameter
        self.tanh = tf.nn.tanh      # activation function hyperbolique tangente (output in ]-1,1[

    def __call__(self, query, ref, env):
        """
        This function gets a query tensor and ref tensor and returns the logit op.
        Args: 
            query: is the hidden state of the decoder at the current
                time step. [batch_size x dim]
            ref: the set of hidden states from the encoder. 
                [batch_size x max_time x dim]

        Returns:
            e: convolved ref with shape [batch_size x max_time x dim]
            logits: [batch_size x max_time]
        """
        # get the current demand and load values from environment
        demand = env.demand
        load = env.load
        max_time = tf.shape(input=demand)[1]


        # embed demand and project it
        # emb_d:[batch_size x max_time x dim ]
        emb_d = self.emb_d(tf.expand_dims(demand,2))
        # d:[batch_size x max_time x dim ]
        d = self.project_d(emb_d)

        # embed load - demand
        # emb_ld:[batch_size*beam_width x max_time x hidden_dim]
        emb_ld = self.emb_ld(tf.expand_dims(tf.tile(tf.expand_dims(load,1),[1,max_time])-
                                              demand,2))
        # ld:[batch_size*beam_width x hidden_dim x max_time ] 
        ld = self.project_ld(emb_ld)

        # expanded_q,e: [batch_size x max_time x dim]
        e = self.project_ref(ref)
        q = self.project_query(query) #[batch_size x dim]
        expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])

        # v_view:[batch_size x dim x 1]
        v_view = tf.tile( self.v, [tf.shape(input=e)[0],1,1]) 
        
        # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
        #       [batch_size x max_time]
        u = tf.squeeze(tf.matmul(self.tanh(expanded_q + e + d + ld), v_view),2)

        if self.use_tanh:
            logits = self.C * self.tanh(u)
        else:
            logits = u  

        return e, logits
 

In [3]:
   
class AttentionVRPCritic(object):
    """A generic attention module for the attention in vrp model"""
    def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):

        self.use_tanh = use_tanh
        self._scope = _scope

        with tf.compat.v1.variable_scope(_scope+_name):
            # self.v: is a variable with shape [1 x dim]
            self.v = tf.compat.v1.get_variable('v',[1,dim],
                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
            self.v = tf.expand_dims(self.v,2)
            
        self.emb_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/emb_d') #conv1d
        self.project_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_d') #conv1d_1
        
        self.project_query = tf.compat.v1.layers.Dense(dim,_scope=_scope+_name +'/proj_q') #
        self.project_ref = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_e') #conv1d_2

        self.C = C  # tanh exploration parameter
        self.tanh = tf.nn.tanh
        
    def __call__(self, query, ref, env):
        """
        This function gets a query tensor and ref rensor and returns the logit op.
        Args: 
            query: is the hidden state of the decoder at the current
                time step. [batch_size x dim]
            ref: the set of hidden states from the encoder. 
                [batch_size x max_time x dim]

            env: keeps demand ond load values and help decoding. Also it includes mask.
                env.mask: a matrix used for masking the logits and glimpses. It is with shape
                         [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
                         positive number in this mask means that the node cannot be selected as next 
                         decision point.
                env.demands: a list of demands which changes over time.

        Returns:
            e: convolved ref with shape [batch_size x max_time x dim]
            logits: [batch_size x max_time]
        """
        # we need the first demand value for the critic
        demand = env.input_data[:,:,-1]
        max_time = tf.shape(input=demand)[1]

        # embed demand and project it
        # emb_d:[batch_size x max_time x dim ]
        emb_d = self.emb_d(tf.expand_dims(demand,2))
        # d:[batch_size x max_time x dim ]
        d = self.project_d(emb_d)


        # expanded_q,e: [batch_size x max_time x dim]
        e = self.project_ref(ref)
        q = self.project_query(query) #[batch_size x dim]
        expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])

        # v_view:[batch_size x dim x 1]
        v_view = tf.tile( self.v, [tf.shape(input=e)[0],1,1]) 
        
        # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
        #       [batch_size x max_time]
        u = tf.squeeze(tf.matmul(self.tanh(expanded_q + e + d), v_view),2)

        if self.use_tanh:
            logits = self.C * self.tanh(u)
        else:
            logits = u  

        return e, logits

In [4]:
class DecodeStep(object):
    '''
    Base class of the decoding (without RNN)
    '''
    def __init__(self, 
            ClAttention,
            args,
            hidden_dim,
            use_tanh=False,
            tanh_exploration=10.,
            n_glimpses=0,
            mask_glimpses=True,
            mask_pointer=True,
            _scope=''):
        '''
        This class does one-step of decoding.
        Inputs:
            ClAttention:    the class which is used for attention
            hidden_dim:     hidden dimension of RNN
            use_tanh:       whether to use tanh exploration or not
            tanh_exploration: parameter for tanh exploration
            n_glimpses:     number of glimpses
            mask_glimpses:  whether to use masking for the glimpses or not
            mask_pointer:   whether to use masking for the glimpses or not
            _scope:         variable scope
        '''

        self.hidden_dim = hidden_dim
        self.use_tanh = use_tanh
        self.tanh_exploration = tanh_exploration
        self.n_glimpses = n_glimpses
        self.mask_glimpses = mask_glimpses
        self.mask_pointer = mask_pointer
        self._scope = _scope
        self.BIGNUMBER = 100000.


        # create glimpse and attention instances as well as tf.variables.
        ## create a list of class instances
        self.glimpses = [None for _ in range(self.n_glimpses)]
        for i in range(self.n_glimpses):
            self.glimpses[i] = ClAttention(hidden_dim, 
                use_tanh=False,
                _scope=self._scope,
                _name="Glimpse"+str(i))
            
        # build TF variables required for pointer
        self.pointer = ClAttention(hidden_dim, 
            use_tanh=use_tanh, 
            C=tanh_exploration,
            _scope=self._scope,
            _name="Decoder/Attention")

    def get_logit_op(self,
                     decoder_inp,
                     context,
                     Env,
                    *args,
                    **kwargs):
        """
        For a given input to deocoder, returns the logit op.
        Input:
            decoder_inp: it is the input problem with dimensions [batch_size x dim].
                        Usually, it is the embedded problem with dim = embedding_dim.
            context: the context vetor from the encoder. It is usually the output of rnn with
                      shape [batch_size x max_time x dim]
            Env: an instance of the environment. It should have:
                Env.mask: a matrix used for masking the logits and glimpses. It is with shape
                         [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
                         positive number in this mask means that the node cannot be selected as 
                         the next decision point.
        Returns:
            logit: the logits which will used by decoder for producing a solution. It has shape
            [batch_size x max_time].
        """

        # glimpses
        for i in range(self.n_glimpses):
            # ref: [batch_size x max_time x hidden_dim], logit : [batch_size x max_time]
            ref, logit = self.glimpses[i](decoder_inp, context,Env)
            if self.mask_glimpses:
                logit -= self.BIGNUMBER* Env.mask
            # prob: [batch_size x max_time
            prob = tf.nn.softmax(logit)
            # decoder_inp : [batch_size x 1 x max_time ] * [batch_size x max_time x hidden_dim] -> 
            #[batch_size x hidden_dim ]
            decoder_inp = tf.squeeze(tf.matmul( tf.expand_dims(prob,1),ref) ,1)

        # attention
        _, logit = self.pointer(decoder_inp,context,Env)
        if self.mask_pointer:
            logit -= self.BIGNUMBER* Env.mask

        return logit , None

    def step(self,
            decoder_inp,
            context,
            Env,
            decoder_state=None,
            *args,
            **kwargs):
        '''
        get logits and probs at a given decoding step.
        Inputs:
            decoder_input: Input of the decoding step with shape [batch_size x embedding_dim]
            context: context vector to use in attention
            Env: an instance of the environment
            decoder_state: The state of the LSTM cell. It can be None when we use a decoder without 
                LSTM cell.
        Returns:
            logit: logits with shape [batch_size x max_time]
            prob: probabilities for the next location visit with shape of [batch_size x max_time]
            logprob: log of probabilities
            decoder_state: updated state of the LSTM cell
        '''

        logit, decoder_state = self.get_logit_op(
                     decoder_inp,
                     context,
                     Env, 
                     decoder_state)

        logprob = tf.nn.log_softmax(logit)
        prob = tf.exp(logprob)
        #prob has shape(batch_num, node_number ),  here is (batch_number, 11),  for each batch,  the one node's value will be close to 1, which
        #is the stop this agent believe should visit next
        #next step,  figure out what to do next with this prob
        return logit, prob, logprob, decoder_state



In [5]:
class RNNDecodeStep(DecodeStep):
    '''
    Decodes the sequence. It keeps the decoding history in a RNN.
    '''
    def __init__(self, 
            ClAttention,
            args,
            hidden_dim,
            use_tanh=False,
            tanh_exploration=10.,
            n_glimpses=0,
            mask_glimpses=True,
            mask_pointer=True,
            forget_bias=1.0,
            rnn_layers=1,
            _scope=''):

        '''
        This class does one-step of decoding which uses RNN for storing the sequence info.
        Inputs:
            ClAttention:    the class which is used for attention
            hidden_dim:     hidden dimension of RNN
            use_tanh:       whether to use tanh exploration or not
            tanh_exploration: parameter for tanh exploration
            n_glimpses:     number of glimpses
            mask_glimpses:  whether to use masking for the glimpses or not
            mask_pointer:   whether to use masking for the glimpses or not
            forget_bias:    forget bias of LSTM
            rnn_layers:     number of LSTM layers
            _scope:         variable scope

        '''

        super(RNNDecodeStep,self).__init__(ClAttention,
                                        args,
                                        hidden_dim,
                                        use_tanh=use_tanh,
                                        tanh_exploration=tanh_exploration,
                                        n_glimpses=n_glimpses,
                                        mask_glimpses=mask_glimpses,
                                        mask_pointer=mask_pointer,
                                        _scope=_scope)
        self.forget_bias = forget_bias
        self.rnn_layers = rnn_layers     
#         self.dropout = tf.placeholder(tf.float32,name='decoder_rnn_dropout')

        # build a multilayer LSTM cell
        single_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(hidden_dim, 
            forget_bias=forget_bias)
        self.dropout = tf.constant( args['dropout'] )
        single_cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
                cell=single_cell, input_keep_prob=(1.0 - self.dropout))
        self.cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([single_cell] * rnn_layers)

    def get_logit_op(self,
                    decoder_inp,
                    context,
                    Env,
                    decoder_state,
                    *args,
                    **kwargs):
        """
        For a given input to decoder, returns the logit op and new decoder_state.
        Input:
            decoder_inp: it is the input problem with dimensions [batch_size x dim].
                        Usually, it is the embedded problem with dim = embedding_dim.
            context: the context vetor from the encoder. It is usually the output of rnn with
                      shape [batch_size x max_time x dim]
            Env: an instance of the environment. It should have:
                Env.mask: a matrix used for masking the logits and glimpses. It is with shape
                         [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
                         positive number in this mask means that the node cannot be selected as 
                         the next decision point.
            decoder_state: The state as a list of size rnn_layers, and each element is a
                    LSTMStateTuples with  x 2 tensors with dimension of [batch_size x hidden_dim].
                    The first one corresponds to c and the second one is h.
        Returns:
            logit: the logits which will used by decoder for producing a solution. It has shape
                    [batch_size x max_time].
            decoder_state: the update decoder state.
        """

        # decoder_inp = tf.reshape(decoder_inp,[-1,1,self.hidden_dim])
        _ , decoder_state = tf.compat.v1.nn.dynamic_rnn(self.cell,
                                              decoder_inp,
                                              initial_state=decoder_state,
                                              scope=self._scope+'Decoder/LSTM/rnn')
        hy = decoder_state[-1].h

        # glimpses
        for i in range(self.n_glimpses):
            # ref: [batch_size x max_time x hidden_dim], logit : [batch_size x max_time]
            ref, logit = self.glimpses[i](hy,context,Env)
            if self.mask_glimpses:
                logit -= self.BIGNUMBER* Env.mask
            prob = tf.nn.softmax(logit)
            
            # hy : [batch_size x 1 x max_time ] * [batch_size x max_time x hidden_dim] -> 
            #[batch_size x hidden_dim ]
            hy = tf.squeeze(tf.matmul( tf.expand_dims(prob,1),ref) ,1)

        # attention
        _, logit = self.pointer(hy,context,Env)
        if self.mask_pointer:
            logit -= self.BIGNUMBER* Env.mask
    
        return logit , decoder_state


In [None]:
args, prt = ParseParams()
batch_size = 2
nodes = 5
cust = nodes-1
args['batch_size'] = batch_size
args['n_nodes'] = nodes
args['n_cust'] = cust
clAttentionActor = AttentionVRPActor  #change to use Attention actor
decodeStep = RNNDecodeStep(clAttentionActor,
                        args,
                        args['hidden_dim'],
                        use_tanh=args['use_tanh'],
                        tanh_exploration=args['tanh_exploration'],
                        n_glimpses=args['n_glimpses'],
                        mask_glimpses=args['mask_glimpses'],
                        mask_pointer=args['mask_pointer'],
                        forget_bias=args['forget_bias'],
                        rnn_layers=args['rnn_layers'],
                        _scope='Actor/')

In [None]:
beam_width = 1
embedder_model = FullGraphEmbedding(args['embedding_dim'],args)
env = Env( args )
env.reset()

input_data_norm = env.input_data_norm
encoder_emb_inp = embedder_model(input_data_norm)
t1 = encoder_emb_inp[:,env.n_nodes-1]
t2 = tf.expand_dims(t1, 1)
t3 = tf.tile( t2, [1,1,1])
decoder_input = tf.tile(tf.expand_dims(encoder_emb_inp[:,env.n_nodes-1], 1),
                                [beam_width,1,1])
context = tf.tile(encoder_emb_inp,[beam_width,1,1])

# decoder_state
initial_state = tf.zeros([args['rnn_layers'], 2, args['batch_size']*beam_width, args['hidden_dim']])
l = tf.unstack(initial_state, axis=0)  #l is a list of tensors
decoder_state = tuple([tf.compat.v1.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
          for idx in range(args['rnn_layers'])])

#logit, prob, logprob, decoder_state = decodeStep.step(decoder_input,
#                                context,
#                                env,
#                                decoder_state)


In [None]:
BatchSequence = tf.expand_dims(tf.cast(tf.range(batch_size*beam_width), tf.int64), 1)
# create tensors and lists
actions_tmp = []
logprobs = []
probs = []
idxs = []

for i in range(args['decode_len']):  # decode_len is 20

    logit, prob, logprob, decoder_state = decodeStep.step(decoder_input,
                        context,
                        env,
                        decoder_state)
    # idx: [batch_size*beam_width x 1]
    beam_parent = None
    def my_multinomial():
        prob_idx = tf.stop_gradient(prob)
        print( ' prob_idx: ', prob_idx.numpy())
        prob_idx_cum = tf.cumsum(prob_idx,1)
        print( ' prob_idx_cum：', prob_idx_cum.numpy())
        rand_uni = tf.tile(tf.random.uniform([batch_size,1]),[1,env.n_nodes])
        print( ' rand_uni: ', rand_uni.numpy())
        # sorted_ind : [[0,1,2,3..],[0,1,2,3..] , ]
        sorted_ind = tf.cast(tf.tile(tf.expand_dims(tf.range(env.n_nodes),0),[batch_size,1]),tf.int64)
        print( ' sorted_ind: ', sorted_ind.numpy())
        tmp = tf.multiply(tf.cast(tf.greater(prob_idx_cum,rand_uni),tf.int64), sorted_ind)+\
            10000*tf.cast(tf.greater_equal(rand_uni,prob_idx_cum),tf.int64)
        print( ' tmp: ', tmp.numpy())
        idx = tf.expand_dims(tf.argmin(input=tmp,axis=1),1)
        print( ' idx: ', idx.numpy())
        return tmp, idx

    tmp, idx = my_multinomial()
    # check validity of tmp -> True or False -- True mean take a new sample
    t1 = tf.reduce_sum(input_tensor=tmp,axis=1)
    print( ' t1: ', t1.numpy())
    t2 = tf.greater( t1, (10000*env.n_nodes)-1 )
    print( ' t2: ', t2.numpy())
    tmp_check = tf.cast(tf.reduce_sum(input_tensor=tf.cast(tf.greater(tf.reduce_sum(input_tensor=tmp,axis=1),(10000*env.n_nodes)-1),
                                              tf.int32)),tf.bool)
    tmp , idx = tf.cond(pred=tmp_check,true_fn=my_multinomial,false_fn=lambda:(tmp,idx))
    
    state = env.step(idx,beam_parent)
    print( ' load status: ', state.load.numpy() )
    print( ' demand status: ', state.demand.numpy() )
    print( ' satisfied demand: ', state.d_sat.numpy())
    print( ' mask: ', state.mask.numpy())
    batched_idx = tf.concat([BatchSequence,idx],1)


    decoder_input = tf.expand_dims(tf.gather_nd(
        tf.tile(encoder_emb_inp,[beam_width,1,1]), batched_idx),1)

    logprob = tf.math.log(tf.gather_nd(prob, batched_idx))
    probs.append(prob)
    idxs.append(idx)
    logprobs.append(logprob)

    action = tf.gather_nd(tf.tile( env.input_pnt, [beam_width,1,1]), batched_idx )
    actions_tmp.append(action)
    
    