In [1]:
import sys
import os
sys.path.append(os.path.abspath('..'))  # Add parent directory to path

from typing import List, Callable, Optional, Union, Tuple, Dict, Any
from DP_Knapsack import solve_knapsack_dp, solve_KP_instances_with_DP
from Greedy_Knapsack import solve_problem_instances_greedy
from KnapsackPPO import KnapsackPPOSolver
from KnapsackA2C import KnapsackA2C
from KnapsackQLearning import KnapsackDQN
from util.instance_gen import KnapsackInstanceGenerator

from util.metrics import evaluate_knapsack_performance
from KnapsackDRLSolver import KnapsackDRLSolver, run_KPSolver
from environment.knapsackgym import KnapsackEnv


This jupyter notebook is purely for testing and experimenting.

## Testing stuff

In [None]:

# TODO integrate the instance generator in this code

# Example usage
if __name__ == "__main__":
    

    N = 50
    M = 10
    gamma = 0.99
    t_max = 40000
    # t_max = None

    env:KnapsackEnv = KnapsackEnv(problem_instance=None, N=N)
    gen = KnapsackInstanceGenerator(seed=42)

    problem_instances = gen.generate('RI', M=M, N=N, R=100)
    print(problem_instances)

    KPSolver_A2C = KnapsackA2C(N=N, gamma=gamma, lr_policy=0.001, lr_value=0.001, verbose=False)
    KPSolver_PPO = KnapsackPPOSolver(N=N, gamma=gamma, policy_lr=0.001, value_lr=0.001, verbose=False)
    KPSolver_DQN = KnapsackDQN(N=N, gamma=gamma, lr=0.001, verbose=False)

    DP_sol_items, DP_value, DP_weight = solve_KP_instances_with_DP(problem_instances)
    Greedy_value_total, Greedy_selected, Greedy_weight_total = solve_problem_instances_greedy(problem_instances)
    
    
    _, A2C_Results = run_KPSolver(env=env, KPSolver=KPSolver_A2C, training_problem_instances=problem_instances, t_max=t_max)
    _, PPO_Results = run_KPSolver(env=env, KPSolver=KPSolver_PPO, training_problem_instances=problem_instances, t_max=t_max)
    _, DQN_Results = run_KPSolver(env=env, KPSolver=KPSolver_DQN, training_problem_instances=problem_instances, t_max=t_max)

   


[{'values': [9, 70, 21, 10, 53], 'weights': [78, 66, 44, 44, 86], 'capacity': 293}, {'values': [76, 20, 37, 47, 50, 5, 55, 16, 75, 69, 93, 75, 37, 97, 42, 33, 91, 38, 8, 47, 80, 19, 47, 13, 69, 48, 34, 23, 57, 67, 95, 44, 17, 84, 63, 71, 10], 'weights': [77, 72, 79, 52, 13, 84, 46, 51, 38, 19, 93, 79, 65, 41, 83, 55, 45, 46, 23, 10, 56, 89, 7, 86, 83, 28, 64, 17, 76, 71, 36, 7, 98, 45, 90, 68, 78], 'capacity': 100}, {'values': [58, 64, 57, 56, 10, 56, 80, 31, 61, 4, 35, 44, 99, 22, 28, 41, 100, 86, 4, 24, 83, 6, 86, 29, 92, 30, 44, 67, 13, 56, 51, 79, 100, 67, 41, 41, 42, 82, 33], 'weights': [84, 44, 81, 85, 39, 90, 29, 24, 69, 64, 14, 84, 20, 81, 1, 80, 79, 79, 67, 48, 71, 28, 79, 56, 46, 51, 57, 4, 14, 25, 12, 44, 67, 66, 48, 86, 57, 8, 77], 'capacity': 58}, {'values': [17, 39, 24, 31, 69, 64, 61, 37, 96, 9, 35, 12, 34, 97, 37, 91, 50], 'weights': [3, 11, 10, 78, 73, 70, 47, 72, 17, 91, 51, 94, 16, 50, 70, 50, 45], 'capacity': 213}, {'values': [55, 18, 47, 86, 2, 76, 50, 72, 67, 44, 

  states_tensor = torch.FloatTensor(states)
  value_loss = F.mse_loss(state_values, returns_tensor)


Iteration [1000/20000], Training KP Instance 0, Reward: -0.005276762817043792
Iteration [2000/20000], Training KP Instance 0, Reward: -0.004517794822580917
Iteration [3000/20000], Training KP Instance 0, Reward: -0.004517794822580917
Iteration [4000/20000], Training KP Instance 0, Reward: -0.004517794822580917
Iteration [5000/20000], Training KP Instance 0, Reward: -0.004517794822580917
Iteration [6000/20000], Training KP Instance 0, Reward: -0.004517794822580917
Iteration [7000/20000], Training KP Instance 0, Reward: -0.005276762817043792
Iteration [8000/20000], Training KP Instance 0, Reward: -0.04680122243024061
Iteration [9000/20000], Training KP Instance 0, Reward: -0.012955943704415804
Iteration [10000/20000], Training KP Instance 0, Reward: -0.04680122243024061
Iteration [11000/20000], Training KP Instance 0, Reward: -0.04680122243024061
Iteration [12000/20000], Training KP Instance 0, Reward: -0.04680122243024061
Iteration [13000/20000], Training KP Instance 0, Reward: -0.03489

In [3]:
 # print(A2C_Results, DP_value,  Greedy_value_total)
A2C_metric_results = evaluate_knapsack_performance(A2C_Results["instance_best_values"], DP_value, Greedy_value_total)
PPO_metric_results = evaluate_knapsack_performance(PPO_Results["instance_best_values"], DP_value, Greedy_value_total)
DQN_metric_results = evaluate_knapsack_performance(DQN_Results["instance_best_values"], DP_value, Greedy_value_total)

print("Optimal solution values:", DP_value)
print("Greedy solution values:", Greedy_value_total)
print("A2C Trained solution values:", A2C_Results["instance_best_values"])
print("PPO Trained solution values:", PPO_Results)
print("DQN Trained solution values:", DQN_Results)

print(A2C_metric_results)
print(PPO_metric_results)
print(DQN_metric_results)



Optimal solution values: [154.0, 354.0, 334.0, 459.0, 460.0, 453.0, 418.0, 304.0, 0.0, 307.0]
Greedy solution values: [154.0, 352.0, 327.0, 459.0, 460.0, 453.0, 405.0, 290.0, 0.0, 307.0]
A2C Trained solution values: [154. 249. 327. 459. 450. 453. 355. 290.   0. 307.]
PPO Trained solution values: {'instance_best_values': array([154., 302., 292., 459., 404., 453., 375., 304.,   0., 307.]), 'best_values_over_time': array([[142.,   0.,   0., ...,   0.,   0.,   0.],
       [142., 140.,   0., ...,   0.,   0.,   0.],
       [142., 140., 151., ...,   0.,   0.,   0.],
       ...,
       [154., 302., 292., ..., 304.,   0., 307.],
       [154., 302., 292., ..., 304.,   0., 307.],
       [154., 302., 292., ..., 304.,   0., 307.]]), 'best_sum_over_time': array([ 142.,  282.,  433., ..., 3050., 3050., 3050.]), 'avg_rewards_over_time': array([-1.29559437e-02, -4.59702381e-01, -8.01014957e-01, ...,
       -8.68762089e-01, -2.15555556e+00, -8.00034754e+01])}
DQN Trained solution values: {'instance_best

In [12]:
print(A2C_Results)

{'instance_best_values': [133.0, 186.0, 201.0, 134.0, 88.0, 242.0, 188.0, 93.0, 0, 258.0, 245.0, 121.0, 166.0, 172.0, 161.0, 259.0, 193.0, 126.0, 148.0, 142.0, 103.0, 294.0, 71.0, 123.0, 30.0, 185.0, 94.0, 94.0, 116.0, 200.0, 176.0, 119.0, 167.0, 163.0, 82.0, 244.0, 380.0, 198.0, 150.0, 98.0, 165.0, 104.0, 314.0, 344.0, 23.0, 152.0, 114.0, 178.0, 154.0, 169.0, 51.0, 204.0, 203.0, 211.0, 132.0, 241.0, 160.0, 225.0, 203.0, 164.0, 165.0, 147.0, 216.0, 135.0, 95.0, 307.0, 114.0, 309.0, 156.0, 167.0, 110.0, 325.0, 149.0, 111.0, 194.0, 107.0, 138.0, 171.0, 195.0, 74.0, 87.0, 237.0, 182.0, 132.0, 202.0, 90.0, 195.0, 95.0, 230.0, 134.0, 221.0, 278.0, 230.0, 215.0, 83.0, 123.0, 175.0, 138.0, 90.0, 118.0, 221.0, 174.0, 150.0, 280.0, 163.0, 98.0, 370.0, 159.0, 332.0, 134.0, 76.0, 124.0, 78.0, 78.0, 213.0, 288.0, 163.0, 134.0, 72.0, 162.0, 213.0, 129.0, 308.0, 164.0, 80.0, 149.0, 157.0, 218.0, 245.0, 146.0, 89.0, 83.0, 122.0, 218.0, 92.0, 68.0, 122.0, 295.0, 64.0, 76.0, 191.0, 129.0, 205.0, 87.0, 

In [None]:
print("Trained solution values:", A2C_Results)
print("Optimal solution values:", DP_value)
print("Greedy solution values:", Greedy_value_total)


Trained solution values: [144.0, 294.0, 283.0]
Optimal solution values: [154.0, 354.0, 334.0]
Greedy solution values: [154.0, 352.0, 327.0]
