In [None]:
from gym.knapsackgym import KnapsackEnv
from typing import List, Callable, Optional, Union, Tuple, Dict, Any
from models.DP_Knapsack import solve_knapsack_dp, solve_KP_instances_with_DP
from models.Greedy_Knapsack import solve_problem_instances_greedy
from models.KnapsackPPO import KnapsackPPOSolver
from models.KnapsackA2C import KnapsackA2C
from models.KnapsackQLearning import KnapsackDQN
from util.instance_gen import KnapsackInstanceGenerator
from util.metrics import evaluate_knapsack_performance
from models.KnapsackDRLSolver import KnapsackDRLSolver, run_KPSolver


## Testing stuff

In [None]:

# TODO integrate the instance generator in this code

# Example usage
if __name__ == "__main__":
    

    N = 50
    gamma = 0.99
    # t_max = 20000
    t_max = None

    env:KnapsackEnv = KnapsackEnv(problem_instance=None, N=N)
    gen = KnapsackInstanceGenerator(seed=42)

    problem_instances = gen.generate('RI', M=1000, N=N, R=100)
    print(problem_instances)

    KPSolver_A2C = KnapsackA2C(N=N, gamma=gamma, lr_policy=0.001, lr_value=0.001, verbose=False)
    KPSolver_PPO = KnapsackPPOSolver(N=N, gamma=gamma, policy_lr=0.001, value_lr=0.001, verbose=False)
    KPSolver_DQN = KnapsackDQN(N=N, gamma=gamma, lr=0.001, verbose=False)

    DP_sol_items, DP_value, DP_weight = solve_KP_instances_with_DP(problem_instances)
    Greedy_value_total, Greedy_selected, Greedy_weight_total = solve_problem_instances_greedy(problem_instances)
    
    
    A2C_Results = run_KPSolver(env=env, KPSolver=KPSolver_A2C, training_problem_instances=problem_instances, t_max=t_max)
    PPO_Results = run_KPSolver(env=env, KPSolver=KPSolver_PPO, training_problem_instances=problem_instances, t_max=t_max)
    DQN_Results = run_KPSolver(env=env, KPSolver=KPSolver_DQN, training_problem_instances=problem_instances, t_max=t_max)

   


In [None]:
 # print(A2C_Results, DP_value,  Greedy_value_total)
A2C_metric_results = evaluate_knapsack_performance(A2C_Results["instance_best_values"], DP_value, Greedy_value_total)
PPO_metric_results = evaluate_knapsack_performance(PPO_Results, DP_value, Greedy_value_total)
DQN_metric_results = evaluate_knapsack_performance(DQN_Results, DP_value, Greedy_value_total)

print("Optimal solution values:", DP_value)
print("Greedy solution values:", Greedy_value_total)
print("A2C Trained solution values:", A2C_Results["instance_best_values"])
print("PPO Trained solution values:", PPO_Results)
print("DQN Trained solution values:", DQN_Results)

print(A2C_metric_results)
print(PPO_metric_results)
print(DQN_metric_results)



Optimal solution values: [154.0, 354.0, 334.0, 459.0, 460.0, 453.0, 418.0, 304.0, 0.0, 307.0, 863.0, 643.0, 232.0, 560.0, 708.0, 663.0, 652.0, 648.0, 485.0, 353.0, 103.0, 481.0, 71.0, 181.0, 30.0, 188.0, 94.0, 94.0, 581.0, 304.0, 525.0, 502.0, 529.0, 248.0, 82.0, 563.0, 690.0, 232.0, 319.0, 159.0, 720.0, 176.0, 694.0, 760.0, 23.0, 152.0, 114.0, 272.0, 479.0, 698.0, 51.0, 763.0, 370.0, 428.0, 201.0, 424.0, 333.0, 570.0, 637.0, 164.0, 581.0, 651.0, 641.0, 464.0, 141.0, 580.0, 176.0, 552.0, 408.0, 234.0, 134.0, 535.0, 234.0, 155.0, 281.0, 385.0, 545.0, 181.0, 294.0, 74.0, 148.0, 1270.0, 462.0, 494.0, 289.0, 255.0, 540.0, 95.0, 879.0, 690.0, 221.0, 371.0, 466.0, 750.0, 164.0, 607.0, 268.0, 328.0, 335.0, 526.0, 430.0, 183.0, 314.0, 427.0, 538.0, 98.0, 760.0, 159.0, 518.0, 634.0, 592.0, 442.0, 440.0, 235.0, 445.0, 644.0, 163.0, 134.0, 304.0, 231.0, 227.0, 330.0, 308.0, 225.0, 80.0, 517.0, 724.0, 1095.0, 406.0, 577.0, 260.0, 83.0, 163.0, 558.0, 187.0, 93.0, 122.0, 1025.0, 170.0, 92.0, 822.0, 

In [12]:
print(A2C_Results)

{'instance_best_values': [133.0, 186.0, 201.0, 134.0, 88.0, 242.0, 188.0, 93.0, 0, 258.0, 245.0, 121.0, 166.0, 172.0, 161.0, 259.0, 193.0, 126.0, 148.0, 142.0, 103.0, 294.0, 71.0, 123.0, 30.0, 185.0, 94.0, 94.0, 116.0, 200.0, 176.0, 119.0, 167.0, 163.0, 82.0, 244.0, 380.0, 198.0, 150.0, 98.0, 165.0, 104.0, 314.0, 344.0, 23.0, 152.0, 114.0, 178.0, 154.0, 169.0, 51.0, 204.0, 203.0, 211.0, 132.0, 241.0, 160.0, 225.0, 203.0, 164.0, 165.0, 147.0, 216.0, 135.0, 95.0, 307.0, 114.0, 309.0, 156.0, 167.0, 110.0, 325.0, 149.0, 111.0, 194.0, 107.0, 138.0, 171.0, 195.0, 74.0, 87.0, 237.0, 182.0, 132.0, 202.0, 90.0, 195.0, 95.0, 230.0, 134.0, 221.0, 278.0, 230.0, 215.0, 83.0, 123.0, 175.0, 138.0, 90.0, 118.0, 221.0, 174.0, 150.0, 280.0, 163.0, 98.0, 370.0, 159.0, 332.0, 134.0, 76.0, 124.0, 78.0, 78.0, 213.0, 288.0, 163.0, 134.0, 72.0, 162.0, 213.0, 129.0, 308.0, 164.0, 80.0, 149.0, 157.0, 218.0, 245.0, 146.0, 89.0, 83.0, 122.0, 218.0, 92.0, 68.0, 122.0, 295.0, 64.0, 76.0, 191.0, 129.0, 205.0, 87.0, 

In [None]:
print("Trained solution values:", A2C_Results)
print("Optimal solution values:", DP_value)
print("Greedy solution values:", Greedy_value_total)


Trained solution values: [144.0, 294.0, 283.0]
Optimal solution values: [154.0, 354.0, 334.0]
Greedy solution values: [154.0, 352.0, 327.0]
