In [None]:
import argparse
import os
import numpy as np
from tqdm.auto import tqdm
import tensorflow as tf
import time

import shared.misc_utils as utils

from configs import ParseParams

#from shared import embeddings
11
from evaluation.benchmark import benchmark
#from model.attention_agent import RLAgent

import pickle,time,os

from configs import ParseParams
from shared.graph_embedding.useful_files.gnn_film_model import GNN_FiLM_Model
from shared.graph_embedding.useful_files.number_vehicle_task import Nb_Vehicles_Task
from VRP.vrp_utils import DataGenerator

In [None]:
import tensorflow as tf
import numpy as np
import time, os

from shutil import copyfile
from sklearn.preprocessing import normalize
from shared.embeddings import LinearEmbedding,GraphEmbedding
from shared.graph_embedding.full_graph_learning import FullGraphEmbedding
from shared.decode_step import RNNDecodeStep

class RLAgent(object):

    def __init__(self,
                args,
                prt,
                env,
                dataGen,
                reward_func,
                clAttentionActor,
                clAttentionCritic,
                is_train=True,
                _scope=''):
        '''
        This class builds the model and run testt and train.
        Inputs:
            args: arguments. See the description in config.py file.
            prt: print controller which writes logs to a file.
            env: an instance of the environment.
            dataGen: a data generator which generates data for test and training.
            reward_func: the function which is used for computing the reward. It returns the tour length.
            clAttentionActor: Attention mechanism that is used in actor.
            clAttentionCritic: Attention mechanism that is used in critic.
            is_train: if true, the agent is used for training; else, it is used only
                        for inference.
        '''

        self.args = args
        self.prt = prt
        self.env = env
        self.dataGen = dataGen
        self.reward_func = reward_func
        self.clAttentionCritic = clAttentionCritic

        if args['embedding_graph'] == 2:
            self.embedder_model = FullGraphEmbedding(args['embedding_dim'],args)
        else:
            self.embedder_model = LinearEmbedding(args['embedding_dim'],_scope=_scope+'Actor/')

        if args['embedding_graph'] ==1:
            data_test = self.dataGen.get_test_all()
            self.embedder_graph = GraphEmbedding(args,data_test)



        self.decodeStep = RNNDecodeStep(clAttentionActor,
                        args['hidden_dim'],
                        use_tanh=args['use_tanh'],
                        tanh_exploration=args['tanh_exploration'],
                        n_glimpses=args['n_glimpses'],
                        mask_glimpses=args['mask_glimpses'],
                        mask_pointer=args['mask_pointer'],
                        forget_bias=args['forget_bias'],
                        rnn_layers=args['rnn_layers'],
                        _scope='Actor/')
        self.decoder_input = tf.compat.v1.get_variable('decoder_input', [1,1,args['embedding_dim']],
                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))

        start_time  = time.time()
        if is_train:
            self.train_summary = self.build_model(decode_type = "stochastic" )
            self.train_step = self.build_train_step()

        self.val_summary_greedy = self.build_model(decode_type = "greedy" )
        self.val_summary_beam = self.build_model(decode_type = "beam_search")

        model_time = time.time()- start_time
        self.prt.print_out("It took {}s to build the agent.".format(str(model_time)))

        self.saver = tf.compat.v1.train.Saver(
            var_list=tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES))

        self.out_avg_resul = open(args['log_dir']+"/avg_inference.txt", "w")

In [None]:
import tensorflow as tf
import numpy as np
import time


from shared.embeddings import Embedding
from shared.graph_embedding.useful_files.utils import get_activation
from shared.graph_embedding.useful_files.gnn_film import sparse_gnn_film_layer

class FullGraphEmbedding(Embedding):
    """
    Implements a graph embedding, not test
    """
    def __init__(self,embedding_dim,args):
        assert args['embedding_dim'] == 30, args['embedding_dim']
        super(FullGraphEmbedding,self).__init__('full_graph',embedding_dim)

        self.nb_feat = args['input_dim']
        self.n_nodes = args['n_nodes']

        self._scale = [5,12,25,50,100]
        self._scale = [i * np.sqrt(2)/100 for i in self._scale]     # rescale to the square

        self.drop_out = tf.compat.v1.placeholder(tf.float32,name='embedder_graph_dropout')
        self.params = {
            'graph_num_layers': 8,
            'graph_num_timesteps_per_layer': 3,

            'graph_layer_input_dropout_keep_prob': 0.8,
            'graph_dense_between_every_num_gnn_layers': 1,
            'graph_model_activation_function': 'tanh',
            'graph_residual_connection_every_num_layers': 1,
            'graph_inter_layer_norm': False,
            "hidden_size": 30,
            "graph_activation_function": "ReLU",
            "message_aggregation_function": "sum",
            "normalize_messages_by_num_incoming": True
            }


    def _propagate_graph_model(self,initial_node_features, incoming_edge, list_pair_adjancy):
        """
        Build the propagation model via graph
        :param initial_node_features:
        :param incoming_edge:
        :param list_pair_adjancy:
        :return:
        """
        h_dim= self.params['hidden_size']
        activation_fn = get_activation(self.params['graph_model_activation_function'])

        projected_node_features = tf.keras.layers.Dense(units=h_dim,
                                      use_bias=False,
                                      activation=activation_fn,
                                      )(initial_node_features)

        cur_node_representations = projected_node_features
        last_residual_representations = tf.zeros_like(cur_node_representations)
        for layer_idx in range(self.params['graph_num_layers']):
            # with tf.variable_scope('gnn_layer_%i' % layer_idx):
            cur_node_representations = \
                tf.nn.dropout(cur_node_representations, rate= 1- self.drop_out)
            if layer_idx % self.params['graph_residual_connection_every_num_layers'] == 0:
                t = cur_node_representations
                if layer_idx > 0:
                    cur_node_representations += last_residual_representations
                    cur_node_representations /= 2
                last_residual_representations = t
            cur_node_representations = \
                self._apply_gnn_layer(cur_node_representations,list_pair_adjancy,incoming_edge,self.params['graph_num_timesteps_per_layer'])
            if self.params['graph_inter_layer_norm']:
                cur_node_representations = tf.contrib.layers.layer_norm(cur_node_representations)
            if layer_idx % self.params['graph_dense_between_every_num_gnn_layers'] == 0:
                cur_node_representations = \
                    tf.keras.layers.Dense(units=h_dim,
                                          use_bias=False,
                                          activation=activation_fn,
                                          name="Dense",
                                          )(cur_node_representations)

        return cur_node_representations


    def _apply_gnn_layer(self,node_representations,adjacency_lists,type_to_num_incoming_edges,num_timesteps):
        """
        Apply the actual gnn layer
        """
        return sparse_gnn_film_layer(
            node_embeddings=node_representations,
            adjacency_lists=adjacency_lists,
            type_to_num_incoming_edges=type_to_num_incoming_edges,
            state_dim=self.params['hidden_size'],
            num_timesteps=num_timesteps,
            activation_function=self.params['graph_activation_function'],
            message_aggregation_function=self.params['message_aggregation_function'],
            normalize_by_num_incoming=self.params["normalize_messages_by_num_incoming"])


    def _prepare_input_data(self, input_tf):
        """
        Prepare the input data so that they are at the right size
        :param input_tf:
        :return:
        """
        #shape of input_tf is [None, 11, 3] which mean undetermined batches, 11 nodes,  andd 3 columne for each node to list the x, y coordinate and demand qty
        batch_features = tf.reshape(input_tf,[-1,self.nb_feat])
        #batch features are put the nodes infor together,  into shape [None, 3]
        input_dist = input_tf[:,:,:2]
        square_input = tf.reduce_sum(input_tensor=tf.square(input_dist), axis=2)
        row = tf.reshape(square_input, [-1,self.n_nodes,1])
        col= tf.reshape(square_input,[-1,1,self.n_nodes])
        dist_matrix = tf.sqrt(tf.maximum(row - 2 * tf.matmul(input_dist,input_dist,False,True) + col,0.0))
        #shape of dist_matrix would be [?, self.n_nodes, self.n_nodes]
        # value is the distance between nodes,  coordinate of node Ni is (Xi1, Xi2), and node Nj is (Xj1, Xj2)
        #then the value in the maxtrix for position[ ?, i, j] would be sqrt( (Xi1 - Xj1 ) ^2 + (Xi2 - Xj2)^2 )
        # example dist_matrix: 
        # dist_matrix: 
        # tf.Tensor(
        #[[[ 0.         4.2426405  8.485281  12.7279215]
        #  [ 4.2426405  0.         4.2426405  8.485281 ]
        #  [ 8.485281   4.2426405  0.         4.2426405]
        #  [12.7279215  8.485281   4.2426405  0.       ]]
        # [[ 0.         4.2426405  8.485281  12.7279215]
        #  [ 4.2426405  0.         4.2426405  8.485281 ]
        #  [ 8.485281   4.2426405  0.         4.2426405]
        #  [12.7279215  8.485281   4.2426405  0.       ]]], shape=(2, 4, 4), 
        list_num_incoming_ege = []
        list_pair_edge = []
        # not_masked is a [?, self.n_nodes, self.n_nodes] shape boolean tensor, all intial values are true
        not_masked = tf.ones_like(dist_matrix,dtype=tf.bool)
        temp = tf.zeros_like(not_masked[0,:,:])
        #so set_diag will set the diagnal values to zeros, like:
        #  [ [0, 1, 1, 1],
        #    [1, 0, 1, 1],
        #    [1, 1, 0, 1],
        tf.linalg.set_diag(not_masked,tf.zeros_like(not_masked[0,:,:]))
        
        for i in range(len(self._scale)):
            true_for_edge = tf.less_equal(dist_matrix,self._scale[i])
            true_for_edge = tf.logical_and(not_masked,true_for_edge)
            # continue above example , 
            # true for edge less than 5.0: 
            #tf.Tensor(
            #[[[ True  True False False]
            #  [ True  True  True False]
            #  [False  True  True  True]
            #  [False False  True  True]]
            # [[ True  True False False]
            #  [ True  True  True False]
            #  [False  True  True  True]
            #  [False False  True  True]]], shape=(2, 4, 4), dtype=bool)
            
            # all values less than self._scale[i] are true,  and diagnal values are false
            # same shape as dist_matrix [?,n,n]
            indices = tf.cast(tf.compat.v1.where(true_for_edge),dtype=tf.int32)
            #indices of  coordinates of all the edges in dist_matrix with value less than self._scale[i]
            offset = self.n_nodes * indices[:,0]    # get all batch value
             #batch 1,2, 11 nodes,  offset would be [11, 11, 11, ....11, 22, 22, 22, ...22]
            offset = tf.expand_dims(offset,axis=1)
            #after expending become: [[11],[11],...[11],[22],[22],...[22]]
            offset = tf.tile(offset,[1,2])
            #after tiline become:  [[11,11],[11,11],...[11,11],[22,22], [22,22],...[22]]
            #indices[:,1:3] is the index of last two column,  so really the edge (from node to 'to node')
            true_indices_nodes = offset + indices[:,1:3]
            # so now actually true_indices_nodes is like embedding the batching dimension into the edge columns
            list_pair_edge.append(true_indices_nodes)

            num_incoming = tf.reduce_sum(input_tensor=tf.cast(true_for_edge,dtype=tf.int32), axis=1)
            # continue examples， num incoming: 
            #tf.Tensor(
            #[[2 3 3 2]
            # [2 3 3 2]], shape=(2, 4), dtype=int32)
            
            num_incoming = tf.squeeze(tf.reshape(num_incoming,[1,-1]),0)
            # reshaped: tf.Tensor([[2 3 3 2 2 3 3 2]], shape=(1, 8), dtype=int32)
            # squeezed: tf.Tensor([2 3 3 2 2 3 3 2], shape=(8,), dtype=int32)
            list_num_incoming_ege.append(tf.cast(num_incoming,dtype=tf.float32))
            # list_num_incoming_ege is a list of 5 tensor
            # update the mask
            not_masked = tf.logical_and(not_masked,tf.logical_not(true_for_edge)) # we update the mask. The only one not masked are the one wich
                                                                                    # were not and did not belong to the edge type
        final_incoming_edge = tf.stack(list_num_incoming_ege)


        return batch_features, final_incoming_edge, list_pair_edge

    def __call__(self, input_tf):
        """
        return the node embedding
        :param input_tf: the tensor corresponding to the embedding
        :return: a tensor
        """
        time_init = time.time()
        initial_node_features, incoming_edge, list_pair_adjancy = self._prepare_input_data(input_tf)

        final_node_representations = self._propagate_graph_model(initial_node_features,incoming_edge,list_pair_adjancy)
        final_node_representations = tf.reshape(final_node_representations,[-1,self.n_nodes,self.embedding_dim])

        self.total_time += time.time() - time_init

        return final_node_representations


In [None]:
class RLAgent( RLAgent ):
    def build_model(self, decode_type = "greedy"):

        # builds the model
        args = self.args
        env = self.env
        batch_size = tf.shape(input=env.input_pnt)[0]

        # input_pnt: [batch_size x max_time x dim_task]
        input_pnt = env.input_pnt

        # encoder_emb_inp: [batch_size, max_time, embedding_dim]
        if self.args['embedding_graph'] == 0:
            encoder_emb_inp = self.embedder_model(input_pnt)
        elif self.args['embedding_graph'] == 1:
            encoder_emb_inp = self.env.embeded_data
        else:
            encoder_emb_inp = self.embedder_model(env.input_data_norm)

        if decode_type == 'greedy' or decode_type == 'stochastic':
            beam_width = 1
        elif decode_type == 'beam_search':
            beam_width = args['beam_width']
        else:
            assert False

        # reset the env. The environment is modified to handle beam_search decoding.
        env.reset(beam_width)

        BatchSequence = tf.expand_dims(tf.cast(tf.range(batch_size*beam_width), tf.int64), 1)


        # create tensors and lists
        actions_tmp = []
        logprobs = []
        probs = []
        idxs = []

        # start from depot
        idx = (env.n_nodes-1)*tf.ones([batch_size*beam_width,1])
        action = tf.tile(input_pnt[:,env.n_nodes-1],[beam_width,1])

        # decoder_state
        initial_state = tf.zeros([args['rnn_layers'], 2, batch_size*beam_width, args['hidden_dim']])
        l = tf.unstack(initial_state, axis=0)
        decoder_state = tuple([tf.compat.v1.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
                  for idx in range(args['rnn_layers'])])

        # start from depot in VRP
        # decoder_input: [batch_size*beam_width x 1 x hidden_dim]
        decoder_input = tf.tile(tf.expand_dims(encoder_emb_inp[:,env.n_nodes-1], 1),
                                [beam_width,1,1])

        # decoding loop
        context = tf.tile(encoder_emb_inp,[beam_width,1,1])
        for i in range(args['decode_len']):

            logit, prob, logprob, decoder_state = self.decodeStep.step(decoder_input,
                                context,
                                env,
                                decoder_state)
            # idx: [batch_size*beam_width x 1]
            beam_parent = None
            if decode_type == 'greedy':
                idx = tf.expand_dims(tf.argmax(input=prob, axis=1),1)
            elif decode_type == 'stochastic':
                # select stochastic actions. idx has shape [batch_size x 1]
                # tf.multinomial sometimes gives numerical errors, so we use our multinomial :(
                def my_multinomial():
                    prob_idx = tf.stop_gradient(prob)
                    prob_idx_cum = tf.cumsum(prob_idx,1)
                    rand_uni = tf.tile(tf.random.uniform([batch_size,1]),[1,env.n_nodes])
                    # sorted_ind : [[0,1,2,3..],[0,1,2,3..] , ]
                    sorted_ind = tf.cast(tf.tile(tf.expand_dims(tf.range(env.n_nodes),0),[batch_size,1]),tf.int64)
                    tmp = tf.multiply(tf.cast(tf.greater(prob_idx_cum,rand_uni),tf.int64), sorted_ind)+\
                        10000*tf.cast(tf.greater_equal(rand_uni,prob_idx_cum),tf.int64)

                    idx = tf.expand_dims(tf.argmin(input=tmp,axis=1),1)
                    return tmp, idx

                tmp, idx = my_multinomial()
                # check validity of tmp -> True or False -- True mean take a new sample
                tmp_check = tf.cast(tf.reduce_sum(input_tensor=tf.cast(tf.greater(tf.reduce_sum(input_tensor=tmp,axis=1),(10000*env.n_nodes)-1),
                                                          tf.int32)),tf.bool)
                tmp , idx = tf.cond(pred=tmp_check,true_fn=my_multinomial,false_fn=lambda:(tmp,idx))

            elif decode_type == 'beam_search':
                if i==0:
                    # BatchBeamSeq: [batch_size*beam_width x 1]
                    # [0,1,2,3,...,127,0,1,...],
                    batchBeamSeq = tf.expand_dims(tf.tile(tf.cast(tf.range(batch_size), tf.int64),
                                                         [beam_width]),1)
                    beam_path  = []
                    log_beam_probs = []
                    # in the initial decoder step, we want to choose beam_width different branches
                    # log_beam_prob: [batch_size, sourceL]
                    log_beam_prob = tf.math.log(tf.split(prob,num_or_size_splits=beam_width, axis=0)[0])

                elif i > 0:
                    log_beam_prob = tf.math.log(prob) + log_beam_probs[-1]
                    # log_beam_prob:[batch_size, beam_width*sourceL]
                    log_beam_prob = tf.concat(tf.split(log_beam_prob, num_or_size_splits=beam_width, axis=0),1)

                # topk_prob_val,topk_logprob_ind: [batch_size, beam_width]
                topk_logprob_val, topk_logprob_ind = tf.nn.top_k(log_beam_prob, beam_width)

                # topk_logprob_val , topk_logprob_ind: [batch_size*beam_width x 1]
                topk_logprob_val = tf.transpose(a=tf.reshape(
                    tf.transpose(a=topk_logprob_val), [1,-1]))

                topk_logprob_ind = tf.transpose(a=tf.reshape(
                    tf.transpose(a=topk_logprob_ind), [1,-1]))

                #idx,beam_parent: [batch_size*beam_width x 1]
                idx = tf.cast(topk_logprob_ind % env.n_nodes, tf.int64) # Which city in route.
                beam_parent = tf.cast(topk_logprob_ind // env.n_nodes, tf.int64) # Which hypothesis it came from.

                # batchedBeamIdx:[batch_size*beam_width]
                batchedBeamIdx= batchBeamSeq + tf.cast(batch_size,tf.int64)*beam_parent
                prob = tf.gather_nd(prob,batchedBeamIdx)

                beam_path.append(beam_parent)
                log_beam_probs.append(topk_logprob_val)

            state = env.step(idx,beam_parent)
            batched_idx = tf.concat([BatchSequence,idx],1)


            decoder_input = tf.expand_dims(tf.gather_nd(
                tf.tile(encoder_emb_inp,[beam_width,1,1]), batched_idx),1)

            logprob = tf.math.log(tf.gather_nd(prob, batched_idx))
            probs.append(prob)
            idxs.append(idx)
            logprobs.append(logprob)

            action = tf.gather_nd(tf.tile(input_pnt, [beam_width,1,1]), batched_idx )
            actions_tmp.append(action)

        if decode_type=='beam_search':
            # find paths of the beam search
            tmplst = []
            tmpind = [BatchSequence]
            for k in reversed(range(len(actions_tmp))):

                tmplst = [tf.gather_nd(actions_tmp[k],tmpind[-1])] + tmplst
                tmpind += [tf.gather_nd(
                    (batchBeamSeq + tf.cast(batch_size,tf.int64)*beam_path[k]),tmpind[-1])]
            actions = tmplst

        else:
            actions = actions_tmp

        if self.args['min_trucks']:
            tile_input_pt = tf.tile(input_pnt[:,env.n_nodes-1,:],[beam_width,1])
            R = self.reward_func(actions,args['decode_len'],self.args['n_nodes']-1,tile_input_pt)
        else:
            R = self.reward_func(actions)


        ### critic
        v = tf.constant(0)
        if decode_type=='stochastic':
            with tf.compat.v1.variable_scope("Critic"):
                with tf.compat.v1.variable_scope("Encoder"):
                    # init states
                    initial_state = tf.zeros([args['rnn_layers'], 2, batch_size, args['hidden_dim']])
                    l = tf.unstack(initial_state, axis=0)
                    rnn_tuple_state = tuple([tf.compat.v1.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1]) # index + corresponds to coord
                              for idx in range(args['rnn_layers'])])

                    hy = rnn_tuple_state[0][1]

                with tf.compat.v1.variable_scope("Process"):
                    for i in range(args['n_process_blocks']):

                        process = self.clAttentionCritic(args['hidden_dim'],_name="P"+str(i))
                        e,logit = process(hy, encoder_emb_inp, env)

                        prob = tf.nn.softmax(logit)
                        # hy : [batch_size x 1 x sourceL] * [batch_size  x sourceL x hidden_dim]  ->
                        #[batch_size x h_dim ]
                        hy = tf.squeeze(tf.matmul(tf.expand_dims(prob,1), e ) ,1)

                with tf.compat.v1.variable_scope("Linear"):
                    v = tf.squeeze(tf.compat.v1.layers.dense(tf.compat.v1.layers.dense(hy,args['hidden_dim']\
                                                               ,tf.nn.relu,name='L1'),1,name='L2'),1)


        return (R, v, logprobs, actions, idxs, env.input_pnt , probs)

In [None]:
class RLAgent( RLAgent ):
    def build_train_step(self):
        '''
        This function returns a train_step op, in which by running it we proceed one training step.
        '''
        args = self.args

        R, v, logprobs, actions, idxs , batch , probs= self.train_summary

        v_nograd = tf.stop_gradient(v)
        R = tf.stop_gradient(R)

        # losses
        actor_loss = tf.reduce_mean(input_tensor=tf.multiply((R-v_nograd),tf.add_n(logprobs)),axis=0)     # compute mean over the zero axis
        critic_loss = tf.compat.v1.losses.mean_squared_error(R,v)

        # optimizers
        actor_optim = tf.compat.v1.train.AdamOptimizer(args['actor_net_lr'])
        critic_optim = tf.compat.v1.train.AdamOptimizer(args['critic_net_lr'])

        # compute gradients
        actor_gra_and_var = actor_optim.compute_gradients(actor_loss,\
                                tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='Actor'))
        critic_gra_and_var = critic_optim.compute_gradients(critic_loss,\
                                tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='Critic'))

        # clip gradients
        clip_actor_gra_and_var = [(tf.clip_by_norm(grad, args['max_grad_norm']), var) \
                                  for grad, var in actor_gra_and_var]

        clip_critic_gra_and_var = [(tf.clip_by_norm(grad, args['max_grad_norm']), var) \
                                  for grad, var in critic_gra_and_var]

        # apply gradients
        actor_train_step = actor_optim.apply_gradients(clip_actor_gra_and_var)
        critic_train_step = critic_optim.apply_gradients(clip_critic_gra_and_var)

        train_step = [actor_train_step,
                          critic_train_step ,
                          actor_loss,
                          critic_loss,
                          actor_gra_and_var,
                          critic_gra_and_var,
                          R,
                          v,
                          logprobs,
                          probs,
                          actions,
                          idxs]
        return train_step

In [None]:
class RLAgent( RLAgent ):
    def Initialize(self,sess):
        self.sess = sess
        self.sess.run(tf.compat.v1.global_variables_initializer())
        self.load_model()

    def load_model(self):
        latest_ckpt = tf.train.latest_checkpoint(self.args['load_path'])
        if latest_ckpt is not None:
            print("have load model")
            self.saver.restore(self.sess, latest_ckpt)


    def evaluate_single(self,eval_type='greedy'):
        start_time = time.time()
        avg_reward = []
        all_output = []

        if eval_type == 'greedy':
            summary = self.val_summary_greedy
        elif eval_type == 'beam_search':
            summary = self.val_summary_beam
        self.dataGen.reset()
        for step in range(self.dataGen.n_problems):

            data = self.dataGen.get_test_next()
            input_concat = np.concatenate(data)
            norm_by_feature = np.reshape(np.transpose(input_concat),(self.args['input_dim'],-1))
            norm_by_feature = normalize(norm_by_feature, axis=1)
            data_norm = np.reshape(np.transpose(norm_by_feature),(data.shape[0],data.shape[1],data.shape[2]))

            if self.args['embedding_graph'] == 0:
                dict_to_feed = {self.env.input_data:data,
                                self.env.input_data_norm:data_norm,
                                self.env.embeded_data: np.zeros(shape=(self.args['batch_size'],self.args['n_nodes'],self.args['embedding_dim'])),
                                self.decodeStep.dropout:0.0}
            elif self.args['embedding_graph'] == 1:
                dict_to_feed = {self.env.input_data:data,
                                self.env.input_data_norm:data_norm,
                                self.env.embeded_data:self.embedder_model(data),
                                self.decodeStep.dropout:0.0}
            else:
                dict_to_feed = {self.env.input_data:data,
                                self.env.input_data_norm:data_norm,
                                self.env.embeded_data: np.zeros(shape=(self.args['batch_size'],self.args['n_nodes'],self.args['embedding_dim'])),
                                self.embedder_model.drop_out: 1.0,
                                self.decodeStep.dropout:0.0}

            R, v, logprobs, actions,idxs, batch, _= self.sess.run(summary,
                                         feed_dict=dict_to_feed)
            if eval_type=='greedy':
                avg_reward.append(R)
                R_ind0 = 0
            elif eval_type=='beam_search':
                # R : [batch_size x beam_width]
                R = np.concatenate(np.split(np.expand_dims(R,1) ,self.args['beam_width'], axis=0),1 )
                R_val = np.amin(R,1, keepdims = False)
                R_ind0 = np.argmin(R,1)[0]
                avg_reward.append(R_val)

            # print decode in file data
            example_output = [list(batch[0, self.env.n_nodes-1, :])] # we begin by the depot
            for idx, action in enumerate(actions):
                example_output.append(list(action[R_ind0*np.shape(batch)[0]]))
            all_output.append(example_output)


            # sample decode
            if step % int(self.args['log_interval']) == 0:
                example_output = []
                example_input = []
                for i in range(self.env.n_nodes):
                    example_input.append(list(batch[0, i, :]))
                for idx, action in enumerate(actions):
                    example_output.append(list(action[R_ind0*np.shape(batch)[0]]))
                self.prt.print_out('\n\nVal-Step of {}: {}'.format(eval_type,step))
                self.prt.print_out('\nExample test input: {}'.format(example_input))
                self.prt.print_out('\nExample test output: {}'.format(example_output))
                self.prt.print_out('\nExample test reward: {} - best: {}'.format(R[0],R_ind0))

        end_time = time.time() - start_time

        # Finished going through the iterator dataset.
        self.prt.print_out('\nValidation overall avg_reward: {}'.format(np.mean(avg_reward)) )
        self.prt.print_out('Validation overall reward std: {}'.format(np.sqrt(np.var(avg_reward))) )

        self.prt.print_out("Finished evaluation with %d steps in %s." % (step\
                           ,time.strftime("%H:%M:%S", time.gmtime(end_time))))

        # Ouputting the results
        self._output_results(all_output,eval_type)


    def _output_results(self,all_ouput,eval_type):
        """
        Output the deconding results obtained after a single inference
        :param all_ouput: list of routes, in order
        :param eval_type: the type (greedy or beam_search)
        """
        # create directory
        dir_name = os.path.join(self.args['log_dir'],'results')
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)

        task = self.args['task_name']
        # build task name and datafiles
        if self.args['ups']:
            task_name = '{}-ups-size-{}-len-{}-results-{}.txt'.format(task,self.args['test_size'], self.env.n_nodes,eval_type)
        else:
            task_name = '{}-size-{}-len-{}-results-{}.txt'.format(task,self.args['test_size'], self.env.n_nodes,eval_type)
        fname = os.path.join(self.args['log_dir'],'results', task_name)

        input_file =open(fname, 'w')
        for output in all_ouput:
            depot_x = output[0][0]
            depot_y = output[0][1]

            nb_stop = 0
            for node in output:
                if task == 'vrp':
                    input_file.write(str(node[0]) + " " + str(node[1]) + " ")
                elif task =='vrptw':
                    input_file.write(str(node[0]) + " " + str(node[1]) + " " + str(node[2]) + " " + str(node[3]) + " ")
                else:
                    assert False
                # check if depot or stop
                if abs(depot_x - node[0]) >= 0.001 or abs(depot_y - node[1]) >= 0.001:
                    nb_stop +=1

                if nb_stop == self.env.n_nodes -1:
                    # we have found all the stops so write depot again and break
                    if task == 'vrp':
                        input_file.write(str(depot_x) + " " + str(depot_y))
                    elif task =='vrptw':
                        depot_b_tw = output[0][2]
                        depot_e_tw = output[0][3]
                        input_file.write(str(depot_x) + " " + str(depot_y) + " " + str(depot_b_tw) + " " + str(depot_e_tw))
                    break
            input_file.write("\n")
        input_file.close()

        # copy the input file
        if self.args['ups']:
            copy_name = '{}-ups-size-{}-len-{}-test.txt'.format(task,self.args['test_size'], self.env.n_nodes)
        else:
            copy_name = '{}-size-{}-len-{}-test.txt'.format(task,self.args['test_size'], self.env.n_nodes)
        old_loc = os.path.join(self.args['data_dir'], copy_name)
        new_loc = os.path.join(self.args['log_dir'],'results', copy_name)
        copyfile(old_loc,new_loc)



    def evaluate_batch(self,eval_type='greedy'):
        self.env.reset()
        if eval_type == 'greedy':
            summary = self.val_summary_greedy
            beam_width = 1
        elif eval_type == 'beam_search':
            summary = self.val_summary_beam
            beam_width = self.args['beam_width']


        data = self.dataGen.get_test_all()
        input_concat = np.concatenate(data)
        norm_by_feature = np.reshape(np.transpose(input_concat),(self.args['input_dim'],-1))
        norm_by_feature = normalize(norm_by_feature, axis=1)
        data_norm = np.reshape(np.transpose(norm_by_feature),(data.shape[0],data.shape[1],data.shape[2]))

        if self.args['embedding_graph'] == 0:
            dict_to_feed = {self.env.input_data:data,
                            self.env.input_data_norm:data_norm,
                            self.env.embeded_data: np.zeros(shape=(self.args['batch_size'],self.args['n_nodes'],self.args['embedding_dim'])),
                            self.decodeStep.dropout:0.0}
        elif self.args['embedding_graph'] == 1:
            dict_to_feed = {self.env.input_data:data,
                            self.env.input_data_norm:data_norm,
                            self.env.embeded_data:self.embedder_model(data),
                            self.decodeStep.dropout:0.0}
        else:
            dict_to_feed = {self.env.input_data:data,
                            self.env.input_data_norm:data_norm,
                            self.env.embeded_data: np.zeros(shape=(self.args['batch_size'],self.args['n_nodes'],self.args['embedding_dim'])),
                            self.embedder_model.drop_out: 1.0,
                            self.decodeStep.dropout:0.0}

        start_time = time.time()
        R, v, logprobs, actions,idxs, batch, _= self.sess.run(summary,
                                     feed_dict=dict_to_feed)

        R = np.concatenate(np.split(np.expand_dims(R,1) ,beam_width, axis=0),1 )
        R = np.amin(R,1, keepdims = False)

        end_time = time.time() - start_time
        self.prt.print_out('Average of {} in batch-mode: {} -- std {} -- time {} s'.format(eval_type,\
            np.mean(R),np.sqrt(np.var(R)),end_time))
        self.out_avg_resul.write(eval_type + '_' + str(np.mean(R)) + '\n')

    def inference(self, infer_type='batch'):
        if infer_type == 'batch':
            self.evaluate_batch('greedy')
            self.evaluate_batch('beam_search')
        elif infer_type == 'single':
            self.evaluate_single('greedy')
            self.evaluate_single('beam_search')
        self.prt.print_out("##################################################################")


    def run_train_step(self):
        data = self.dataGen.get_train_next()
        input_concat = np.concatenate(data)
        norm_by_feature = np.reshape(np.transpose(input_concat),(self.args['input_dim'],-1))
        norm_by_feature = normalize(norm_by_feature, axis=1)
        data_norm = np.reshape(np.transpose(norm_by_feature),(data.shape[0],data.shape[1],data.shape[2]))

        if self.args['embedding_graph'] == 0:
            dict_to_feed = {self.env.input_data:data,
                            self.env.input_data_norm:data_norm,
                            self.env.embeded_data: np.zeros(shape=(self.args['batch_size'],self.args['n_nodes'],self.args['embedding_dim'])),
                            self.decodeStep.dropout:self.args['dropout']}
        elif self.args['embedding_graph'] == 1:
            dict_to_feed = {self.env.input_data:data,
                            self.env.input_data_norm:data_norm,
                            self.env.embeded_data:self.embedder_model(data),
                            self.decodeStep.dropout:self.args['dropout']}
        else:
            dict_to_feed = {self.env.input_data:data,
                            self.env.input_data_norm:data_norm,
                            self.env.embeded_data: np.zeros(shape=(self.args['batch_size'],self.args['n_nodes'],self.args['embedding_dim'])),
                            self.embedder_model.drop_out: 0.8,
                            self.decodeStep.dropout:self.args['dropout']}

        train_results = self.sess.run(self.train_step,
                                 feed_dict=dict_to_feed)
        return train_results

In [None]:
class Embedding(object):
    '''
    This class is the base class for embedding the input graph.
    '''
    def __init__(self,emb_type, embedding_dim):
        self.emb_type = emb_type
        self.embedding_dim = embedding_dim

        self.total_time = 0

    def __call__(self,input_pnt):
        # returns the embeded tensor. Should be implemented in child classes
        pass

In [None]:
class LinearEmbedding(Embedding):
    '''
    This class implements linear embedding. It is only a mapping 
    to a higher dimensional space.
    '''
    def __init__(self,embedding_dim,_scope=''):
        '''
        Input: 
            embedding_dim: embedding dimension
        '''

        super(LinearEmbedding,self).__init__('linear',embedding_dim)
        self.project_emb = tf.compat.v1.layers.Conv1D(embedding_dim,1,
            _scope=_scope+'Embedding/conv1d')

    def __call__(self,input_pnt):
        # emb_inp_pnt: [batch_size, max_time, embedding_dim]
        time_init = time.time()
        emb_inp_pnt = self.project_emb(input_pnt)
        # emb_inp_pnt = tf.Print(emb_inp_pnt,[emb_inp_pnt])
        self.total_time += time.time() - time_init
        return emb_inp_pnt

In [None]:
class GraphEmbedding(Embedding):
    """
    This class implement a graph embedding. The specificity is that it has already been optimized on
    another task. Implementation of transfer learning.
    """
    def __init__(self,args,data_test):
        assert args['embedding_dim'] == 30, args['embedding_dim']
        super(GraphEmbedding, self).__init__('graph',embedding_dim=args['embedding_dim'])

        self.n_nodes = args['n_nodes']
        self.embedding_dim =  args['embedding_dim']
        model_path = 'shared/graph_embedding/model_storage/' + args['task'] + '_model.pickle'
        result_dir = args['log_dir'] + '/embedding/'
        os.makedirs(result_dir)
        self.graph_model = self.restore(model_path, result_dir)
        self.graph_model.params['max_nodes_in_batch'] = args['test_size'] * self.n_nodes + 10 # We can process larger batches if we don't do training
        self.embedded_data = self(data_test)


    def __call__(self, input_data):
        """
        :param input_data: the input data as given by the env i.e. [batch_size x max_time x dim_task]
        :return: an embedding corresponding to the final node represenatation obtained via transfer learning.
        """
        time_init = time.time()
        embedded_data = self.graph_model.test(input_data)
        embedded_data = np.reshape(embedded_data,(-1,self.n_nodes,self.embedding_dim))
        self.total_time += time.time() - time_init

        return embedded_data

    @staticmethod
    def restore(saved_model_path: str, result_dir: str, run_id: str = None):
        print("Loading model from file %s." % saved_model_path)
        with open(saved_model_path, 'rb') as in_file:
            data_to_load = pickle.load(in_file)

        # model_cls, _ = name_to_model_class(data_to_load['model_class']({}))   # before...
        model_cls = GNN_FiLM_Model
        task_cls, additional_task_params = Nb_Vehicles_Task, {"data_kind":'transfer_learning'}

        if run_id is None:
            run_id = "_".join([task_cls.name(), model_cls.name(data_to_load['model_params']), time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())])

        task = task_cls(data_to_load['task_params'])
        task.restore_from_metadata(data_to_load['task_metadata'])

        model = model_cls(data_to_load['model_params'], task, run_id, result_dir)
        model.load_weights(data_to_load['weights'])

        model.log_line("Loaded model from snapshot %s." % saved_model_path)

        return model

In [None]:
def test():
    args, prt = ParseParams()
    data_Gen = DataGenerator(args)
    # print(data_Gen.test_data)
    print(data_Gen.test_data.shape)

    graph_embedding = GraphEmbedding(args,data_Gen.test_data)
    data = data_Gen.get_train_next()
    graph_embedding(data)

In [None]:
    
def load_task_specific_components(task,ups):
    '''
    This function load task-specific libraries
    '''
    if task == 'vrp':
        if ups:
            from UPS.vrp_ups_utils import DataGenerator,Env,reward_func
            from UPS.vrp_ups_attention import AttentionVRP_UPS_Actor, AttentionVRP_UPS_Critic

            AttentionActor = AttentionVRP_UPS_Actor
            AttentionCritic = AttentionVRP_UPS_Critic

        else:
            from VRP.vrp_utils import DataGenerator,Env,reward_func
            from VRP.vrp_attention import AttentionVRPActor,AttentionVRPCritic

            AttentionActor = AttentionVRPActor
            AttentionCritic = AttentionVRPCritic

    elif task == 'vrptw':
        if ups:
            from UPS.vrptw_ups_utils import DataGenerator,Env,reward_func
            from UPS.vrptw_ups_attention import AttentionVRPTW_UPS_Actor, AttentionVRPTW_UPS_Critic

            AttentionActor = AttentionVRPTW_UPS_Actor
            AttentionCritic = AttentionVRPTW_UPS_Critic
        else:
            from VRPTW.vrptw_utils import DataGenerator,Env,reward_func
            from VRPTW.vrptw_attention import AttentionVRPTWActor, AttentionVRPTWCritic

            AttentionActor = AttentionVRPTWActor
            AttentionCritic = AttentionVRPTWCritic

    else:
        raise Exception('Task is not implemented')

    return DataGenerator, Env, reward_func, AttentionActor, AttentionCritic

In [None]:
def load_task_specific_eval(task):
    """
    Load taks specific, dependign of tw or not
    """
    if task == 'vrp':
        from evaluation.eval_VRP import eval_google_or,eval_Clarke_Wright

        return [(eval_google_or.EvalGoogleOR,'or_tools'), (eval_Clarke_Wright.EvalClarkeWright,'Clarke_Wright')]

    elif task == 'vrptw':
        from evaluation.eval_VRPTW import eval_tw_google_or,eval_I1_heuristics

        return [(eval_tw_google_or.EvalTWGoogleOR,'or_tools_tw'),(eval_I1_heuristics.EvalI1Heuristics,'I1_heuristic')]

    else:
        raise Exception('Task is not implemented')

In [None]:
def main(args, prt):
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    
    # load task specific classes
    DataGenerator, Env, reward_func, AttentionActor, AttentionCritic = \
        load_task_specific_components(args['task_name'],args['ups'])

    dataGen = DataGenerator(args)
    dataGen.reset()
    env = Env(args)
    # create an RL agent
    agent = RLAgent(args,
                    prt,
                    env,
                    dataGen,
                    reward_func,
                    AttentionActor,
                    AttentionCritic,
                    is_train=args['is_train'])
    agent.Initialize(sess)

    # train or evaluate
    prev_actor_loss, prev_critic_loss = float('Inf'), float('Inf')
    actor_eps, critic_eps = 1e-2, 1e-2
    start_time = time.time()
    convergence_counter = 0
    al_file = open(args['log_dir']+"/actorLoss.txt", "w")
    cl_file = open(args['log_dir']+"/criticLoss.txt", "w")
    r_file = open(args['log_dir']+"/reward.txt", "w")

    if args['is_train']:
        prt.print_out('Training started ...')
        train_time_beg = time.time()
        for step in range(args['n_train']):
            summary = agent.run_train_step()
            _, _ , actor_loss_val, critic_loss_val, actor_gra_and_var_val, critic_gra_and_var_val,\
                R_val, v_val, logprobs_val,probs_val, actions_val, idxs_val= summary

            curr_actor_loss = np.mean(actor_loss_val)
            curr_critic_loss = np.mean(critic_loss_val)
            al_file.write( str(actor_loss_val) + '\n')
            cl_file.write(str(critic_loss_val) + '\n')
            r_file.write(str(np.mean(R_val)) + '\n')

            if abs(prev_actor_loss - curr_actor_loss) < actor_eps \
                and abs(prev_critic_loss - curr_critic_loss) < critic_eps:
                convergence_counter += 1
            else:
                convergence_counter = 0
            if convergence_counter == 10:
                prt.print_out('Converged at step {}'\
                      .format(step))
                train_time_end = time.time()-train_time_beg
                prt.print_out('Train Step: {} -- Time: {} -- Train reward: {} -- Value: {}'\
                      .format(step,time.strftime("%H:%M:%S", time.gmtime(\
                        train_time_end)),np.mean(R_val),np.mean(v_val)))
                prt.print_out('    actor loss: {} -- critic loss: {}'\
                      .format(curr_actor_loss,curr_critic_loss))
                break

            if step%args['save_interval'] == 0:
                agent.saver.save(sess,args['model_dir']+'/model.ckpt', global_step=step)

            if step%args['log_interval'] == 0:
                train_time_end = time.time()-train_time_beg
                prt.print_out('Train Step: {} -- Time: {} -- Embedding Time {} -- Train reward: {} -- Value: {}'\
                      .format(step,time.strftime("%H:%M:%S", time.gmtime(\
                        train_time_end)),time.strftime("%H:%M:%S", time.gmtime(\
                        agent.embedder_model.total_time)),np.mean(R_val),np.mean(v_val)))
                prt.print_out('    actor loss: {} -- critic loss: {}'\
                      .format(curr_actor_loss, curr_critic_loss))

                train_time_beg = time.time()
                agent.embedder_model.total_time = 0
            if step%args['test_interval'] == 0:
                agent.inference(args['infer_type'])
            prev_actor_loss = curr_actor_loss
            prev_critic_loss = curr_critic_loss

        # Save the model at the end of the training
        agent.saver.save(sess,args['model_dir']+'/model.ckpt', global_step=step)

    else: # inference
        prt.print_out('Evaluation started ...')
        agent.inference(args['infer_type'])

        all_evaluator = load_task_specific_eval(args['task_name'])

        # perform the evaluation
        list_eval = ['beam_search'] #['greedy','beam_search']
        for eval_tuple in all_evaluator:
            list_eval.append(eval_tuple[1])

            object_eval = eval_tuple[0](args,env,prt,args['min_trucks'])
            object_eval.perform_routing()
        #
        benchmark_object = benchmark.Benchmark(args,env,prt)
        # list_eval.remove('Clarke_Wright')
        # #list_eval.remove('I1_heuristic')
        benchmark_object.perform_benchmark(list_eval=list_eval)

    prt.print_out('Total time is {}'.format(time.strftime("%H:%M:%S", time.gmtime(time.time()-start_time))))
    al_file.close()
    cl_file.close()
    r_file.close()

In [None]:
if __name__ == "__main__":
    #test()
    #assert False
    args, prt = ParseParams()
    args['is_train'] = True
    # args['infer_type'] = 'single'
    args['test_size'] = 1000
   #  args['log_dir'] = "/Users/jpoullet/Documents/MIT/Thesis/ML6867_project/VRP-RL/logs/vrp20-2019-12-05_09-28-11/"
    # args['load_path'] = "/Users/jpoullet/Documents/MIT/Thesis/ML6867_project/VRP-RL/logs/vrp50-NbTruck/model/"

    # args['data_dir'] = "drive/My Drive/VRP-RL/data"
    # args['log_dir'] = "drive/My Drive/VRP-RL/logs"
    # args['log_dir'] = "{}/{}-{}".format(args['log_dir'],args['task'], utils.get_time())
    # print(args['log_dir'])
    # args['model_dir'] = os.path.join(args['log_dir'],'model')
    #
    # args['load_path'] = "drive/My Drive/VRP-RL/logs/vrptw50-2019-11-25_01-28-09/model/"
    # print(args['model_dir'])
    # # file to write the stdout
    # try:
    #     os.makedirs(args['log_dir'])
    #     os.makedirs(args['model_dir'])
    # except:
    #     pass
    #
    # # create a print handler
    # out_file = open(os.path.join(args['log_dir'], 'results.txt'),'w+')
    # prt = utils.printOut(out_file,args['stdout_print'])

    # Random
    tf.compat.v1.disable_eager_execution()
    random_seed = args['random_seed']
    if random_seed is not None and random_seed > 0:
        prt.print_out("# Set random seed to %d" % random_seed)
        np.random.seed(random_seed)
        tf.random.set_seed(random_seed)
    tf.compat.v1.reset_default_graph()

    main(args, prt)