In [10]:
# import go here
import sys
import os
sys.path.append(os.path.abspath('..'))  # Add parent directory to path

from environment.knapsackgym import KnapsackEnv, _1_positive_reward, _1_negative_reward, v_i_positive_reward, vr_i_positive_reward, w_i_negative_reward, wr_i_negative_reward
from typing import List, Callable, Optional, Union, Tuple, Dict, Any
from models.DP_Knapsack import solve_knapsack_dp, solve_KP_instances_with_DP
from models.Greedy_Knapsack import solve_problem_instances_greedy
from models.KnapsackPPO import KnapsackPPOSolver
from models.KnapsackA2C import KnapsackA2C
from models.KnapsackQLearning import KnapsackDQN
from util.instance_gen import KnapsackInstanceGenerator
from util.metrics import evaluate_knapsack_performance
from models.KnapsackDRLSolver import KnapsackDRLSolver, run_KPSolver
from models.StateAggregator import StateAggregator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Dict, Any, Tuple, Callable
import time
import itertools
import pandas as pd


In [None]:
def run_experiment(
    KPSolver_A2C: KnapsackA2C,
    KPSolver_DQN: KnapsackDQN,
    KPSolver_PPO: KnapsackPPOSolver,
    instance_type,
    N,
    t_max,
    verbose=False):

    M = 1000
    seed = 42
    r_range = 100
    n_test_instances = 200

    # Generate problem instances
    gen = KnapsackInstanceGenerator(seed=seed)

    if verbose:
        print(f"Generating {M} {instance_type} training instances with N={N}, R={r_range}")

    if instance_type == "RI":
        training_instances = gen.generate_random_instances(M, N, r_range, seed=seed)
        test_instances = gen.generate_random_instances(n_test_instances, N, r_range, seed=seed+100)
    elif instance_type == "FI":
        training_instances = gen.generate_fixed_instances(M, N, seed=seed)
        test_instances = gen.generate_fixed_instances(n_test_instances, N, seed=seed+100)
    elif instance_type == "HI":
        training_instances = gen.generate_hard_instances(M, N, r_range, seed=seed)
        test_instances = gen.generate_hard_instances(n_test_instances, N, r_range, seed=seed+100)
    elif instance_type == "SS":
        training_instances = gen.generate_subset_sum_instances(M, N, r_range, seed=seed)
        test_instances = gen.generate_subset_sum_instances(n_test_instances, N, r_range, seed=seed+100)
    else:
        raise ValueError(f"Unknown instance type: {instance_type}")
    
    # Solve instances with DP and Greedy for baselines
    # if verbose: print("Computing DP optimal solutions for training instances...")
    # dp_sols_items_train, dp_values_train, dp_weight_train = solve_KP_instances_with_DP(training_instances)

    if verbose: print("Computing Greedy solutions for training instances...")
    greedy_values_train, greedy_sols_items_train, greedy_weights_train = solve_problem_instances_greedy(training_instances)
    
    # if verbose: print("Computing DP optimal solutions for test instances...")
    # dp_sols_items_test, dp_values_test, dp_weight_test = solve_KP_instances_with_DP(test_instances)
    
    if verbose:
        print("Computing Greedy solutions for test instances...")
    greedy_values_test, greedy_sols_items_test, greedy_weights_test = solve_problem_instances_greedy(test_instances)
    
    # Define models to test
    model_constructors = {}
    if KPSolver_A2C is not None: model_constructors["A2C"] = KPSolver_A2C
    if KPSolver_DQN is not None: model_constructors["DQN"] = KPSolver_DQN
    if KPSolver_PPO is not None: model_constructors["PPO"] = KPSolver_PPO

    aggregation_constructors = [True, False]

    results = {
        'training': {},
        'test': {},
        'metrics': {},
        'config': {
            'num_instances': M,
            'instance_type': instance_type,
            'n_items': N,
            'r_range': r_range,
            'seed': seed,
            't_max': t_max,
        }
    }

    # Total count of experiments
    total_experiments = len(aggregation_constructors) * len(model_constructors)

    if verbose:
        print(f"Running {total_experiments} experiments...")

    experiment_counter = 0
    
    # Run experiments for each model and reward function combination
    for model_name, model in model_constructors.items():
        results['training'][model_name] = {}
        results['test'][model_name] = {}
        results['metrics'][model_name] = {}
        
        for aggr_policy in aggregation_constructors:
            experiment_counter += 1
            aggr_desc = "with state aggregation" if aggr_policy else "without state aggregation"
            
            if verbose:
                print(f"\nExperiment {experiment_counter}/{total_experiments}: Testing {model_name} {aggr_desc}")
            
            # Create environment with specific reward functions
            env = KnapsackEnv(
                problem_instance=None,
                N=N
            )
            
            # Initialize the model
            kp_solver = model
            
            # Train the model
            start_time = time.time()
            
            solver, solution_values = run_KPSolver(
                env=env,
                KPSolver=kp_solver,
                training_problem_instances=training_instances,
                t_max=t_max,
                use_state_aggregation=aggr_policy,
                verbose=verbose
            )
            
            training_time = time.time() - start_time
            
            # Store training results
            results['training'][model_name][aggr_desc] = {
                'solution_values': solution_values,
                'training_time': training_time
            }
            
            # Evaluate on test instances
            test_values = []
            for instance in test_instances:
                env.change_problem_instance(instance)
                # value, weight, _ = kp_solver.solve(instance)
                value, weight, _ = solver.solve(instance)
                test_values.append(value)
            
            # Calculate performance metrics
            
            # For training instances
            train_best_values = solution_values['instance_best_values']
            train_metrics = evaluate_knapsack_performance(
                train_best_values, 
                # dp_values_train, 
                greedy_values_train, 
                greedy_values_train
            )
            
            # For test instances
            test_metrics = evaluate_knapsack_performance(
                test_values,
                # dp_values_test,
                greedy_values_test,
                greedy_values_test
            )
            
            # Store test results and metrics
            results['test'][model_name][aggr_desc] = {
                'values': test_values,
                'metrics': test_metrics
            }
            
            results['metrics'][model_name][aggr_desc] = {
                'train': train_metrics,
                'test': test_metrics
            }
            
            if verbose:
                print(f"Training metrics for {model_name} with {aggr_desc}:")
                print(f"  Val/Opt Ratio: {train_metrics['ValOptRatio']:.2f}%")
                print(f"  #opt: {train_metrics['#opt']}/{M}")
                print(f"  Mean percentage error: {train_metrics['mean_percentage_error']:.4f}")
                print(f"  Mean improvement over greedy: {train_metrics['mean_improvement_over_greedy']:.4f}")
                
                print(f"Test metrics for {model_name} with {aggr_desc}:")
                print(f"  Val/Opt Ratio: {test_metrics['ValOptRatio']:.2f}%")
                print(f"  #opt: {test_metrics['#opt']}/{n_test_instances}")
                print(f"  Mean percentage error: {test_metrics['mean_percentage_error']:.4f}")
                print(f"  Mean improvement over greedy: {test_metrics['mean_improvement_over_greedy']:.4f}")

        # Generate summary table
        summary = create_summary_table(results)
        results['summary'] = summary
        
        # Generate visualizations
        # visualize_results(results)
        
        return results

def create_summary_table(results: Dict[str, Any]) -> pd.DataFrame:
    """
    Create a summary table of all experiments.
    
    Args:
        results: Results dictionary from test_reward_functions
        
    Returns:
        pd.DataFrame: Summary table
    """
    rows = []
    
    for model_name in results['metrics']:
        for reward_combo_name, metrics in results['metrics'][model_name].items():
            train_metrics = metrics['train']
            test_metrics = metrics['test']
            
            row = {
                'Model': model_name,
                'Reward': reward_combo_name,
                'Train_ValOptRatio': train_metrics['ValOptRatio'],
                'Train_#opt': train_metrics['#opt'],
                'Train_vs_Greedy': train_metrics['mean_improvement_over_greedy'],
                'Test_ValOptRatio': test_metrics['ValOptRatio'],
                'Test_#opt': test_metrics['#opt'],
                'Test_MAE': test_metrics['mean_absolute_error'],
                'Test_MPE': test_metrics['mean_percentage_error'],
                'Test_vs_Greedy': test_metrics['mean_improvement_over_greedy']
            }
            
            rows.append(row)
    
    df = pd.DataFrame(rows)
    return df
    

In [14]:
instances = ["RI", "FI", "HI", "SS"]
Ns = [50, 300, 500]
t_max = 100

results = dict()
for instance in instances:
    results[instance] = dict()
    for N in Ns:
        KPSolver_A2C = KnapsackA2C(N=N, gamma=0.99, lr_policy=0.001, lr_value=0.001, verbose=True)
        results[instance][N] = run_experiment(
            KPSolver_A2C,
            None,
            None,
            instance,
            N,
            t_max,
            verbose=True
        )

Generating 1000 RI training instances with N=50, R=100
Computing Greedy solutions for training instances...
Computing Greedy solutions for test instances...
Running 2 experiments...

Experiment 1/2: Testing A2C with state aggregation
Running Model <class 'models.KnapsackA2C.KnapsackA2C'>


  top = (bucket_size ** action) * (self.features.shape[0] - (bucket_size * action))


Training on 1000 KP Instances, with N=50, t_max=100
Iteration [0/100], Training KP Instance 0, Reward: -0.005276762817043792
Training metrics for A2C with with state aggregation:
  Val/Opt Ratio: 4.99%
  #opt: 20/1000
  Mean percentage error: 0.9337
  Mean improvement over greedy: -0.9328
Test metrics for A2C with with state aggregation:
  Val/Opt Ratio: 45.57%
  #opt: 1/5
  Mean percentage error: 0.3432
  Mean improvement over greedy: -0.3432

Experiment 2/2: Testing A2C without state aggregation
Running Model <class 'models.KnapsackA2C.KnapsackA2C'>
Training on 1000 KP Instances, with N=50, t_max=100
Iteration [0/100], Training KP Instance 0, Reward: -0.005276762817043792
Training metrics for A2C with without state aggregation:
  Val/Opt Ratio: 4.94%
  #opt: 20/1000
  Mean percentage error: 0.9348
  Mean improvement over greedy: -0.9348
Test metrics for A2C with without state aggregation:
  Val/Opt Ratio: 75.52%
  #opt: 1/5
  Mean percentage error: 0.1675
  Mean improvement over gree

In [17]:
for instance in instances:
    for N in Ns:
        print(instance, N)
        print(results[instance][N]["summary"])

RI 50
  Model                     Reward  Train_ValOptRatio  Train_#opt  Train_MAE  \
0   A2C     with state aggregation           4.990742          20    404.425   
1   A2C  without state aggregation           4.943981          20    404.528   

   Train_MPE  Train_vs_Greedy  Test_ValOptRatio  Test_#opt  Test_MAE  \
0   0.933705        -0.932848         45.572476          1     193.0   
1   0.934765        -0.934765         75.521715          1      96.8   

   Test_MPE  Test_vs_Greedy  
0  0.343203       -0.343203  
1  0.167463       -0.106228  
RI 300
  Model                     Reward  Train_ValOptRatio  Train_#opt  Train_MAE  \
0   A2C     with state aggregation           2.736754           2   1033.386   
1   A2C  without state aggregation           2.662399           2   1034.176   

   Train_MPE  Train_vs_Greedy  Test_ValOptRatio  Test_#opt  Test_MAE  \
0   0.966648        -0.966648         36.144578          1     328.6   
1   0.967859        -0.967859         42.013214       