In [1]:
import numpy as np
import gurobipy as gp
import torch

import runner
import agent

from graph import Graph
from bssrp_mip import BSSRPMIP
from nn_heuristic import NearestNeighboursHeuristic

from environment import Environment

In [2]:
m = gp.Model()

Set parameter Username


2021-11-27 14:27:37,197:INFO:Set parameter Username


Academic license - for non-commercial use only - expires 2022-01-25


2021-11-27 14:27:37,198:INFO:Academic license - for non-commercial use only - expires 2022-01-25


In [3]:
tol = 1e-3

In [4]:
def demand_of_routes(routes, demands):
    for i in range(len(routes)):
        route = routes[i]
        route_demand_order = list(map(lambda x: demands[x], route))
        print(f"Vehicle {i}:")
        print("    Route: ", route)
        print("    Demand:", route_demand_order)

In [5]:
def eval_mip_sol_in_env(mip, g):
    graph_dict = {0 : g}
    env = Environment(graph_dict, "test", verbose=False)
    
    env.reset(0)

    route_reward = {}
    for i, route in mip.routes.items():
        route_reward[i] = 0
        if len(route) == 0:
            continue

        for a in route[1:]:
            _, reward, done, _ = env.step(torch.tensor(a))
            route_reward[i] += reward
            if done:
                break
        if done:
            break

    total_reward = 0
    for i, reward in route_reward.items():
        total_reward += reward
        
    assert(np.abs(mip.model.objVal + total_reward) < tol)
        
    return total_reward.item()

In [6]:
def eval_nn_in_env(nn, g):
    graph_dict = {0 : g}
    env = Environment(graph_dict, "test", verbose=False)
    
    env.reset(0)
    
    route_reward = {}
    for i, route in enumerate(nn.routes):
        route_reward[i] = 0
        if len(route) == 0:
            continue

        for a in route[1:]:
            _, reward, done, _ = env.step(torch.tensor(a))
            route_reward[i] += reward
            if done:
                break
        if done:
            break

    total_reward = 0
    for i, reward in route_reward.items():
        total_reward += reward
        
    return total_reward.item()

In [7]:
def eval_agent_in_env(rl_agent, g, max_iters = 10000):
    
    graph_dict = {0 : g}
    env = Environment(graph_dict, "test", verbose=False)
    rl_runner = runner.Runner(env, rl_agent)
    reward, route = rl_runner.validate(0, max_iters, verbose=False, return_route=True)
    
    # get routes for each vehicle
    routes = []
    routes.append([0])
    route_num = 0
    for i in route[1:-1]:
        if i != 0:
            routes[route_num].append(i)
        else:
            routes[route_num].append(0)
            route_num += 1
            routes.append([0])
    routes[route_num].append(0)

    return reward, routes

In [8]:
def evaluate(g, n_instances, seed, agent_path="tr_model.pt"):
    
    g.seed(seed)
    results = {
        "mip" : {
            "routes" : [],
            "cost" : [],
        },
        "nn" : {
            "routes" : [],
            "cost" : [],
        },
        "rl" : {
            "routes" : [],
            "cost" : [],
        }
    }
    
    rl_agent = agent.Agent('GATv2', 0)
    rl_agent.load_model(agent_path)
    
    for i in range(n_instances):
        
        if (i+1) % 10 == 0:
            print(f"Instance: {i+1}/{n_instances}")
        
        g.bss_graph_gen()
        
        # get MIP routes/reward
        mip = BSSRPMIP(g, use_penalties=True, no_bikes_leaving=True)
        mip.optimize()
        mip_routes = mip.get_minimal_routes()#list(mip.routes.values())
        mip_reward = eval_mip_sol_in_env(mip, g)
        results["mip"]["routes"].append(mip_routes)
        results["mip"]["cost"].append(mip_reward)
        
        # get NN routes/reward
        nn = NearestNeighboursHeuristic(g)
        nn_routes = nn.run()
        nn_reward = eval_nn_in_env(nn, g)
        results["nn"]["routes"].append(nn_routes)
        results["nn"]["cost"].append(nn_reward)
        
        # get RL routes/reward
        rl_reward, rl_route = eval_agent_in_env(rl_agent, g)
        results["rl"]["routes"].append(rl_route)
        results["rl"]["cost"].append(rl_reward)
        
    return results
    

### Generate Graph

In [9]:
seed = 12343

In [10]:
use_penalties = True
no_bikes_leaving = True

In [11]:
num_nodes = 10
num_vehicles = 5
time_limit = 30

penalty_cost_demand = 2
penalty_cost_time = 5
bike_load_time = 0

speed = 30 

In [12]:
g = Graph(
        num_nodes = num_nodes, 
        k_nn = 2, 
        num_vehicles = num_vehicles,
        penalty_cost_demand = penalty_cost_demand,
        penalty_cost_time = penalty_cost_time, 
        speed = speed,
        bike_load_time=bike_load_time,
        time_limit = time_limit)

### Evaluate

In [13]:
n_instances = 1000

In [14]:
results = evaluate(g, n_instances, seed)

Instance: 10/1000
Instance: 20/1000
Instance: 30/1000
Instance: 40/1000
Instance: 50/1000
Instance: 60/1000
Instance: 70/1000
Instance: 80/1000
Instance: 90/1000
Instance: 100/1000
Instance: 110/1000
Instance: 120/1000
Instance: 130/1000
Instance: 140/1000
Instance: 150/1000
Instance: 160/1000
Instance: 170/1000
Instance: 180/1000
Instance: 190/1000
Instance: 200/1000
Instance: 210/1000
Instance: 220/1000
Instance: 230/1000
Instance: 240/1000
Instance: 250/1000
Instance: 260/1000
Instance: 270/1000
Instance: 280/1000
Instance: 290/1000
Instance: 300/1000
Instance: 310/1000
Instance: 320/1000
Instance: 330/1000
Instance: 340/1000
Instance: 350/1000
Instance: 360/1000
Instance: 370/1000
Instance: 380/1000
Instance: 390/1000
Instance: 400/1000
Instance: 410/1000
Instance: 420/1000
Instance: 430/1000
Instance: 440/1000
Instance: 450/1000
Instance: 460/1000
Instance: 470/1000
Instance: 480/1000
Instance: 490/1000
Instance: 500/1000
Instance: 510/1000
Instance: 520/1000
Instance: 530/1000
In

In [15]:
print("MIP mean reward:", np.mean(results["mip"]["cost"]))
print("NN mean reward:", np.mean(results["nn"]["cost"]))
print("RL mean reward:", np.mean(results["rl"]["cost"]))

MIP mean reward: -146.99421950531007
NN mean reward: -277.1387175827026
RL mean reward: -243.80993399062007
