In [16]:
import numpy as np
import gurobipy as gp
import torch

from graph import Graph
from bssrp_mip import BSSRPMIP
from nn_heuristic import NearestNeighboursHeuristic

from environment import Environment

In [17]:
tol = 1e-5

In [18]:
def demand_of_routes(mip):
    for i in range(len(mip.routes)):
        route = mip.routes[i]
        route_demand_order = list(map(lambda x: mip.demands[x], route))
        print(f"Vehicle {i}:")
        print("    Route: ", route)
        print("    Demand:", route_demand_order)

In [19]:
def eval_mip_sol_in_env(mip, g):
    graph_dict = {0 : g}
    env = Environment(graph_dict, "test")
    
    env.reset(0)

    route_reward = {}
    for i, route in mip.routes.items():
        route_reward[i] = 0
        if len(route) == 0:
            continue

        for a in route[1:]:
            _, reward, done, _ = env.step(torch.tensor(a))
            route_reward[i] += reward
            if done:
                break
        if done:
            break

    total_reward = 0
    for i, reward in route_reward.items():
        total_reward += reward
        
        
    print(f"MIP Obj: {mip.model.objVal}, MDP reward: {total_reward}")

In [20]:
def eval_nn_in_env(g):
    graph_dict = {0 : g}
    env = Environment(graph_dict, "test")
    
    env.reset(0)
    
    nn = NearestNeighboursHeuristic(g)
    routes = nn.run()
    
    route_reward = {}
    for i, route in enumerate(routes):
        route_reward[i] = 0
        if len(route) == 0:
            continue

        for a in route[1:]:
            _, reward, done, _ = env.step(torch.tensor(a))
            route_reward[i] += reward
            if done:
                break
        if done:
            break

    total_reward = 0
    for i, reward in route_reward.items():
        total_reward += reward
        
        
    print(f"NN Reward: {total_reward}")

### Generate Graph

In [21]:
seed = 12343

In [22]:
use_penalties = True
no_bikes_leaving = True

In [23]:
num_nodes = 10
num_vehicles = 3
time_limit = 30

penalty_cost_demand = 2
penalty_cost_time = 5
bike_load_time = 0

speed = 30 

In [24]:
g = Graph(
        num_nodes = num_nodes, 
        k_nn = 2, 
        num_vehicles = num_vehicles,
        penalty_cost_demand = penalty_cost_demand,
        penalty_cost_time = penalty_cost_time, 
        speed = speed,
        bike_load_time=bike_load_time,
        time_limit = time_limit)

In [25]:
g.seed(seed)

In [26]:
g.bss_graph_gen()

### Initialize MIP

In [27]:
mip = BSSRPMIP(g, use_penalties=use_penalties, no_bikes_leaving=no_bikes_leaving)
mip.model.setParam('OutputFlag', 0)

Set parameter TimeLimit to value 100000


In [28]:
mip.optimize()

In [29]:
eval_mip_sol_in_env(mip, g)

Tour:  [0, 1, 3, 9, 0, 2, 5, 6, 7, 4, 8, 0]
Tour Time:  83.84409916710437
Left Demand:  tensor(0.)
Node Visits:  12
Games Finished:  0
####################################################################################################
MIP Obj: 224.2723007894944, MDP reward: -224.2722930908203


In [30]:
eval_nn_in_env(g)

Tour:  [0, 1, 2, 5, 0, 3, 6, 0, 7, 0, 9, 8, 0, 4, 0]
Tour Time:  190.19088433251957
Left Demand:  tensor(-5.)
Node Visits:  15
Games Finished:  0
####################################################################################################
NN Reward: -470.7091369628906
