In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
import time
import re

class KnapsackInstance:
    """
    Class to store knapsack probelm instance information.
    """

    def __init__(self, name: str, num_items: int, 
                 num_knapsacks: int, capacities: List[int], profits: List[int], 
                 weights: np.ndarray, optimal_value: int = None):
        """
        Initialize a knapsack instance.

        Args:
            name: Instance name/identifier
            num_items: Number of items (n)
            num_knapsacks: Nimer of kanpsacks 
            capacities: List of capacitycontraints for each knapsack 
            profits: Profit/value of each item
            weigths: Weight matrix 
            oprimal_value: Know optimal value
        """
        self.name = name
        self.num_items = num_items
        self.num_knapsacks = num_knapsacks
        self.capacities = capacities
        self.profits = profits
        self.weights = weights
        self.optimal_value = optimal_value

In [None]:
class DataLoader:
    """
    Handles loading of knapsack instances from different file formats.
    """

    def load_or_library(self, filepath, instance_names):
        """Load OR-Library instances from mknap2.txt file."""
        # Read the entire file content
        with open(filepath, 'r') as file:
            content = file.read()
        # Helper function to clean a line by removing comments and trimming whitespace
        def clean_line(line: str) -> str:
            if '//' in line:
                line = line[:line.index('//')]
            return line.strip()

        instances = []
        content_lower = content.lower()
        for name in instance_names:
            # Find the start of the instance block by name
            search_pattern = f"problem {name}.DAT"
            search_lower = search_pattern.lower()

            idx = content_lower.find(search_lower)
            if idx == -1:
                print(f"Warning: Instance {name} not found")
                continue
            # Fin the first line after the instance header
            start_pos = content.find('\n', idx)
            if start_pos == -1:
                print(f"Warning: Malformed block for {name}")
                continue
            start_pos += 1

            # Skip empty/comment/++++ lines to find the m n line
            while start_pos < len(content):
                line_end = content.find('\n', start_pos)
                if line_end == -1:
                    line_end = len(content)
                first_line = clean_line(content[start_pos:line_end])
                if first_line and not first_line.startswith('++++'):
                    break
                start_pos = line_end + 1
            # If no valid m n line found, skip this isinstance
            if not first_line:
                print(f"Warning: Could not find m,n for {name}")
                continue
            
            # Parse m (number of knapsacks) and n (number of items)
            parts = first_line.split()
            if len(parts) < 2:
                print(f"Warning: Could not parse m,n for {name}")
                continue

            m = int(parts[0])
            n = int(parts[1])

            # Find the data bloack for this instance
            data_start = line_end + 1
            next_problem_idx = content_lower.find("problem ", data_start)
            data_end = next_problem_idx if next_problem_idx != -1 else len(content)

            instance_text = content[data_start:data_end]
            numbers = []

            # Parse all numbers in the data block, skipping comments/empty/++++ lines
            for line in instance_text.split('\n'):
                cleaned = clean_line(line)
                if not cleaned or cleaned.startswith('++++'):
                    continue
                for token in cleaned.split():
                    try:
                        numbers.append(int(token))
                    except ValueError:
                        continue
            # Check if we have enough numbers for profits, capacities, weigths, and optimal value
            expected_count = n + m + (m * n) + 1
            if len(numbers) < expected_count:
                print(f"Warning: Not enough numbers for {name}. Expected {expected_count}, got {len(numbers)}")
                continue
            # Extract optimal value (last number)
            opt = numbers[-1]
            numbers = numbers[:-1]
            # Extract profits and capacities
            profits = numbers[:n]
            capacities = numbers[n:n+m]
            # Extract weights as 2d list (m rows, n columns)
            weights = []
            weight_start = n + m
            for i in range(m):
                row_start = weight_start + i * n
                row_end = row_start + n
                weights.append(numbers[row_start:row_end])
            # Create and store the KnapsackInstance
            instances.append(KnapsackInstance(
                name, n, m, capacities, profits, np.array(weights), opt
            ))

        return instances


    def load_pisinger(self, filepath, instance_name):
        """
        Load Pisinger instance form CSV file.
        """
        # Read the file content
        with open(filepath, 'r') as file:
            content = file.read()
        # Find the block for the given instance name using regex
        match = re.search(rf"{instance_name}\n(.*?)(?=\nknapPI_|\Z)", content, re.DOTALL)
        if not match:
            raise ValueError(f"Instance {instance_name} not found")
        lines = match.group(1).strip().split('\n')
        n = int(lines[0].split()[1])
        capacity = int(lines[1].split()[1])
        optimal = int(lines[2].split()[1])
        
        # Parse items (skip header, parse weight and profit)
        weights, profits = [], []
        for line in lines[4:4+n]:
            parts = line.split(',')
            weights.append(int(parts[1]))
            profits.append(int(parts[2]))
        
        return KnapsackInstance(
            instance_name, n, 1, [capacity], profits, np.array([weights]), optimal
        )

## Greedy baseline for comparison

In [None]:
def greedy_baseline(instance):
    """ 
    Greedy baseline algorithm for knapsack problem.

    Strategy
        - Sort items by profit-to-weight efficiency ratio (descending)
        - Greedily add items in order until no more items fit
    
    Args:
        instance(KnapsackInstance): Knapsack instance to solve
    
    Returns:
        tuple: (solution, value)
            solution (np.ndarray): Binary solution vector (1=selected,0=not selected)
            value (float): Total profit of the greedy solution
    """
    # Calculate average weight per item across all knapsacks 
    avg_weights = np.mean(instance.weights, axis=0)
    # Calcualte profit-to-weight efficiency ratio for each item
    efficiencies = instance.profits / (avg_weights + 1e-6) 
    # Sort items by efficiency in descending order (most efficient first)
    sorted_items = np.argsort(efficiencies)[::-1] # [::-1] reverses to descending
    # Initialize empty solution (no items selected)
    solution = np.zeros(instance.num_items, dtype=int)
    
    # Greedily add items in order of efficiency
    for item in sorted_items:
        # Try adding this item to the solution
        solution[item] = 1
        # Check if solution is still feasible (cpacity constraints satisfied)
        feasible = True
        for k in range(instance.num_knapsacks):
            # Calculate total weight for knapsack k
            total_weight = np.dot(solution,instance.weights[k])
            # Check if capacity exceeded
            if total_weight > instance.capacities[k]:
                feasible = False
                break
        #If not feasible, remove the item
        if not feasible:
            solution[item] = 0
    # Calculate total profit of the greedy solution
    value = np.dot(solution, instance.profits)
    return solution, value

In [None]:
class BeesAlgorithmStandard: 
    """ 
    Standard Bees Algorithm for 0-1 Multiple Knapsack Problem.

    Implements the complete BA Standard with:
    - Scout bees for global exploration
    - Elite and selected sites for local exploitation
    - Neighborhood shrinking: a(t+1) = shrinking_factor × a(t)
    - Site abandonment after stagnation iterations
    """
    def __init__(self, instance, num_scout_bees=50, num_selected_sites=10,
                 num_elite_sites=5, num_bees_elite=20, num_bees_selected=10,
                 max_iterations=100, initial_neighborhood_size=5, 
                 shrinking_factor=0.8, stagnation_limit=10, random_seed=None):
        """ 
        Initialize Bees Algorithm with parameters
        """
        # Validate parameters as per BA standard
        if num_elite_sites > num_selected_sites:
            raise ValueError("num_elite_sites must be <= num_selected_sites")
        if num_selected_sites > num_scout_bees:
            raise ValueError("num_selected_sites must be <= num_scout_bees")
        if not (0 < shrinking_factor <= 1):
            raise ValueError("shrinking_factor must be in (0, 1]")
        if stagnation_limit < 1:
            raise ValueError("stagnation_limit must be >= 1")
        
        # Store core BA parameters
        self.instance = instance
        self.num_scout_bees = num_scout_bees
        self.num_selected_sites = num_selected_sites
        self.num_elite_sites = num_elite_sites
        self.num_bees_elite = num_bees_elite
        self.num_bees_selected = num_bees_selected
        self.max_iterations = max_iterations
        # Neighborhood shrinking parameters
        self.initial_neighborhood_size = initial_neighborhood_size
        self.shrinking_factor = shrinking_factor
        self.current_neighborhood_size = float(initial_neighborhood_size)
        # Site abandonment parameter
        self.stagnation_limit = stagnation_limit
        self.stagnation_counter = np.zeros(num_selected_sites, dtype=np.int32) 

        # Set random seed for reproducibility
        if random_seed is not None:
            np.random.seed(random_seed)
        # Tracking variables for results.
        self.best_solution = None
        self.best_value = -np.inf 
        self.best_iteration = 0
        self.convergence_history = []
        self.iteration_times = []
    
    def evaluate_fitness(self,solution):
        """ 
        Evaluate the fitness of a solution for the knapsack problem. 

        If the solution is feasible (all contraints satisfied), return the total profit.
        If not feasible, return the profit minus a penalty for constraint violations. 

        Args:
            solution (np.ndarray): The binary solution vector (1 = item selected, 0 = not selected).
        Returns:
            float: The fitness value (profit or penalized profit).
        """
        profit = np.dot(solution, self.instance.profits) # Calculate total profit

        if self.is_feasible(solution):
            return profit # If feasible, return profit
        ## Calculate penalty for contraint vilations 
        violations = np.maximum(0,np.dot(solution,self.instance.weights.T) - self.instance.capacities)
        return profit - 1000 * np.sum(violations) # Penalize profit if infeasible

    def is_feasible(self, solution):
        """ 
        Check if solution satidfies all capacity constraints.

        Args: 
            solution (np.ndarray): Binary solution vector
        
        Returns:
            bool: True if all constraints satisfied, False otherwise
        """
        return np.all(np.dot(solution, self.instance.weights.T) <= self.instance.capacities)
    
    def repair_solution(self, solution):
        """
        Repair an infeasible solution by iteratively removing the least efficient items.

        Efficiency is defined as profit divided by the average weight across all knapsacks.
        Items are removed until the repaired solution satisfies all capacity constraints.

        Args:
            solution (np.ndarray): Binary solution vector to repair.

        Returns:
            np.ndarray: Feasible solution vector.
        """
        solution = solution.copy()  # Work on a copy to avoid side effects.
        profits_array = np.asarray(self.instance.profits)  # Convert profits to ndarray for vector indexing.

        while not self.is_feasible(solution):  # Continue until feasibility is achieved.
            selected_indices = np.flatnonzero(solution)  # Locate indices of selected items.
            if selected_indices.size == 0:  # Stop if nothing is selected.
                break

            weights_selected = self.instance.weights[:, selected_indices]  # Slice weights for selected items.

            if selected_indices.size == 1:  # Handle single-item case explicitly.
                avg_weights = np.array([np.mean(weights_selected)])  # Compute average weight for the lone item.
                profits_selected = np.array([profits_array[int(selected_indices[0])]])  # Grab matching profit.
            else:
                avg_weights = np.mean(weights_selected, axis=0)  # Average weights across knapsacks.
                profits_selected = profits_array[selected_indices]  # Vectorized profit lookup.

            efficiencies = profits_selected / (avg_weights + 1e-6)  # Compute efficiency with epsilon to avoid zero division.
            worst_idx = selected_indices[np.argmin(efficiencies)]  # Identify least efficient item.
            solution[worst_idx] = 0  # Remove the least efficient item.

        return solution  # Return the repaired (feasible) solution.

    def local_search(self, solution, num_neighbors):
        """ 
        Perform local search by flipping bits within current neighborhood size.
        Number of bits flipped is controlled by current_neighborhood_size,
        which shrinks over iterations as per BA standard.  

        Args: 
            solution (np.ndarray): The current binary solution vector. 
            num_neighbors (int): Number of neighbor solutions to evaluate
        
        Returns:
            np.ndarray: The best neighbor solution found.
        """
        solution = np.asarray(solution) # Convert to numpy array 
        best_neighbor = solution.copy() # Start with the current solution as the best 
        best_fitness = self.evaluate_fitness(best_neighbor) # Evaluate its fitness

        # Calculate number of bits to flip based on current neighborhood size
        # Neighborhood shrinks over time, reducing exploration radius
        flip_count = max(1,min(int(self.current_neighborhood_size),
                           self.instance.num_items // 10))
        
        # Generate and evaluate neighbors
        for _ in range(num_neighbors):
            # Create neighbor by copying current solution
            neighbor = solution.copy()
            # Randomly select bits to flip (no replacement)
            flip_indices = np.random.choice(
                self.instance.num_items,
                flip_count,
                replace=False
            )
            # Flip selected bits (0->1, 1->0)
            neighbor[flip_indices] = 1 - neighbor[flip_indices]
            # Repair if infeasible
            neighbor = self.repair_solution(neighbor)
            # Evalute neighbor fitness
            fitness = self.evaluate_fitness(neighbor)
            # Updtae best if neighbor is better
            if fitness > best_fitness:
                best_fitness = fitness
                best_neighbor = neighbor
        return best_neighbor
    
    def _shrink_neighborhood(self):
        """
        Shrink the neighborhood size for next iteration usinng exponential decay.
        Ensure minimum neighborhood size of 1.0 to maintain some exploration. 
        """ 
        # Applay shrinking formula with minimum bound
        self.current_neighborhood_size = max(
            1.0,
            self.current_neighborhood_size * self.shrinking_factor
        )
    
    def _check_and_abandon_sites(self,population,fitness_values,previous_best_sites, current_best_sites):
        """
        Check for stagnant sites and abondon them by replacing with random scouts.

        Args:
            population (np.ndarray): Current population array
            fitness_values (np.ndarray): Current fitness values
            previous_best_sites (np.ndarray): Best sites from previous iteration
            current_best_sites (np.ndarray): Current best sites
        
        Returns:
            tuple: (updated_population, updated_fitness_values, num_abandoned)
        """
        # Check which postions changed(vectorized)
        positions_changed = ~np.all(
            previous_best_sites == current_best_sites, 
            axis=1
        )
        # Update stagnation counters 
        # Reset to 0 if position changed, increment if no change
        self.stagnation_counter = np.where(
            positions_changed,
            0,
            self.stagnation_counter +1
        )
        # Find sites exceeding stagnation limit
        sites_to_abandon = np.flatnonzero(
            self.stagnation_counter >= self.stagnation_limit
        )
        # Abandon stagnant sites by replacing with random scouts
        for site_idx in sites_to_abandon:
            # Generate random scout
            new_solution = np.random.randint(2,size=self.instance.num_items)
            new_solution = self.repair_solution(new_solution)
            new_fitness = self.evaluate_fitness(new_solution)

            # Replace in population
            population[site_idx] = new_solution
            fitness_values[site_idx] = new_fitness

            # Reset stagnation counter for this site
            self.stagnation_counter[site_idx] = 0
        return population, fitness_values, len(sites_to_abandon)


            
    def optimze(self):
        """ 
        Run the Bees Algortihm optimization process for the 0-1 Knapsack Problem. 

        Implements full BA cycle:
        1. Initalize scout bees randomly
        2. Evaluate fintess and sort population
        3. Select best site (elite+ other selected)
        4. Recurit foranger bees to selected sites for local search
        5. Assign remaning bees as scouts for global exploration
        6. Apply neighborhood shrinking
        7. Check and abandon stagnant sites
        8. Repeat untill max_iterations

        Returns:
            tuple: (best_solution, best_value, stats) 
        """ 
        start_time = time.time() 

        # ====================================================================
        # INITIALIZATION: Generate random scout bees and repair
        # ====================================================================
        initial_solutions = np.random.randint(
            2,
            size=(self.num_scout_bees, self.instance.num_items)
        )
        population = np.array([self.repair_solution(sol) for sol in initial_solutions])
        # Track total abandoned sites
        total_abandoned = 0

        # ====================================================================
        # MAIN BA LOOP
        # ====================================================================
        for iteration in range(self.max_iterations):
            iter_start = time.time() # Record iteration start time 

            # ================================================================
            # STEP 1: Evaluate fitness and sort population (best first)
            # ================================================================
            fitness_values = np.array([self.evaluate_fitness(sol) for sol in population])
            sorted_idx = np.argsort(fitness_values)[::-1]
            population = population[sorted_idx]
            fitness_values = fitness_values[sorted_idx]

            # Stor previous best sites for abandonment check
            previous_best_sites = population[:self.num_selected_sites].copy()

            # ================================================================
            # STEP 2: Update global best solution
            # ================================================================

            # Update the best solution found so far
            if fitness_values[0] > self.best_value:
                self.best_value = fitness_values[0]
                self.best_solution = population[0].copy()
                self.best_iteration = iteration
            self.convergence_history.append(self.best_value) # Track best value per iteration

            # ================================================================
            # STEP 3: Build new population
            # ================================================================
            new_population = []

            # Elite sites: more local search bees
            for i in range(self.num_elite_sites):
                # Keep elite solution
                new_population.append(population[i])
                # Recruite forager bees to explore neighborhood
                for _ in range(self.num_bees_elite -1):
                    forager_solution = self.local_search(population[i],10)
                    new_population.append(forager_solution)
            
            # Selected sites: fewer local search bees 
            for i in range(self.num_elite_sites, self.num_selected_sites):
                # Keep selected site solution
                new_population.append(population[i])
                # Recruit forager bees to explore neighborhood
                for _ in range(self.num_bees_selected -1):
                    forager_solution = self.local_search(population[i],5)
                    new_population.append(forager_solution)
            
            # Fill the rest of the population with random scout solutions
            remaining = self.num_scout_bees - len(new_population)
            if remaining > 0:
                scout_solutions = np.random.randint(2, size=(remaining, self.instance.num_items))
                scouts = [self.repair_solution(sol) for sol in scout_solutions]
                new_population.extend(scouts)
            # Ensure population size is exactly num_scout_bees
            population = np.array(new_population[:self.num_scout_bees])

            # ================================================================
            # STEP 4: Re-evaluate and sort after local search
            # ================================================================
            fitness_values = np.array([self.evaluate_fitness(sol) for sol in population])
            sorted_idx = np.argsort(fitness_values)[::-1]
            population = population[sorted_idx]
            fitness_values = fitness_values[sorted_idx]
            #Get current best sites after local search
            current_best_sites = population[:self.num_selected_sites].copy()

            # ================================================================
            # STEP 5: Site abandonment (check stagnation and replace)
            # ================================================================
            population, fitness_values, abandoned = self._check_and_abandon_sites(
                population, fitness_values, previous_best_sites, current_best_sites
            )
            total_abandoned += abandoned

            # ================================================================
            # STEP 6: Neighborhood shrinking
            # ================================================================
            self._shrink_neighborhood()
            # Record iteration time
            self.iteration_times.append(time.time() - iter_start)

            # ================================================================
            # STEP 7: Progress logging (every 10 iterations)
            # ================================================================
            if (iteration + 1) % 10 == 0:
                print(
                    f"Iteration {iteration + 1}/{self.max_iterations}: "
                    f"Best = {self.best_value:.0f}, "
                    f"Ngh = {self.current_neighborhood_size:.2f}, "
                    f"Abandoned = {total_abandoned}"
                )

        # ====================================================================
        # FINALIZATION: Calculate statistics and return results
        # ====================================================================
        total_time = time.time() - start_time
        
        # Calculate final weight usage (vectorized)
        final_weight = np.dot(self.best_solution, self.instance.weights.T).tolist()
        
        # Compile statistics
        stats = {
            'best_value': self.best_value,
            'best_iteration': self.best_iteration,
            'total_time': total_time,
            'avg_iteration_time': np.mean(self.iteration_times),
            'final_weight': final_weight,
            'convergence_history': self.convergence_history,
            'is_feasible': self.is_feasible(self.best_solution),
            'final_neighborhood_size': self.current_neighborhood_size,
            'sites_abandoned': total_abandoned
        }
        
        return self.best_solution, self.best_value, stats

In [None]:
class ExperimentRunner:
    """ 
    Handles running experiments and collecting results.
    """
    def __init__(self, instances):
        """
        Initialize experiment runner.  
        """
        self.instances = instances
        self.results = []
    
    def run_multiple_seeds(self,instance, num_runs=10, ba_params=None):
        """
        Run BA multiple times with different seeds.
        Also compute greedy baseline for comparison.

        Args:
            instance (KnapsackInstance): The knapsack instance to solve.
            num_runs (int): Number of runs with different seeds.
            ba_params (dict): Parameters for the BeesAlgorithm.

        Returns:
            List of dictionaries with run statistics.
        """
        ba_params = ba_params or {}
        run_results = []

        # ================================================================
        # COMPUTE BASELINE: Run greedy baseline ONCE before all BA runs
        # ================================================================
        print(f"\nComputing greedy baseline for {instance.name}...")
        baseline_solution, baseline_value = greedy_baseline(instance)
        print(f"✓ Baseline value: {baseline_value}")
        # Calculate baseline weight usage(for each knapsack)
        baseline_weights = np.dot(baseline_solution, instance.weights.T).tolist()
        
        # ================================================================
        # RUN BEES ALGORITHM: Multiple runs with different seeds
        # ================================================================
        for run in range(num_runs): 
            print(f"\n--- Run {run + 1}/{num_runs} for {instance.name} ---") 
            # Crate BessAlgorithm instance with current seed and parameters
            ba = BeesAlgorithmStandard(instance, random_seed=run, **ba_params)
            # Run optimization and get solution, value and stats 
            solution, value,stats = ba.optimze()

            # ================================================================
            # CALCULATE METRICS
            # ================================================================
            gap = None # Initialize optimality gap 
            if instance.optimal_value: # If optimal value is known
                gap = ((instance.optimal_value - value) / instance.optimal_value * 100)
            # Improvement over baseline
            improvement = ((value - baseline_value) / baseline_value * 100) if baseline_value > 0 else 0

            # Store all relevant results for this run
            run_results.append({
                'run': run,
                'best_value': value,
                'optimal_value': instance.optimal_value,
                'gap': gap,
                'time': stats['total_time'],
                'best_iteration': stats['best_iteration'],
                'solution': solution,
                'convergence': stats['convergence_history'],
                'final_weight': stats['final_weight'],         
                'baseline_value': baseline_value,              
                'baseline_weights': baseline_weights,          
                'improvement': improvement                     
        })
        return run_results
    
    def run_all_instances(self, num_runs=10, ba_params=None):
        """
        Run experiments on all instances. 

        Args:
            num_runs (int): Number of runs per instance.
            ba_params (dict): Parameters for the BeesAlgorithm. 
        """
        for instance in self.instances:  # Loop through all instances
            print(f"\n{'='*60}")
            print(f"Solving: {instance.name} ({instance.num_items} items, "
                  f"{instance.num_knapsacks} knapsacks, optimal={instance.optimal_value})")
            print(f"{'='*60}")

            run_results = self.run_multiple_seeds(instance, num_runs, ba_params) # Run multiple seeds
            self.results.append({'instance': instance, 'run_results': run_results})

    def plot_convergence(self, instance_name):
        """ 
        Plot convergence curves for a specific instance.
        
        Args:
            instance_name: Name of the instance to plot. 
        """
        # Find results for the given instance name
        result = next((r for r in self.results if r['instance'].name == instance_name), None)
        if not result:  # If no results found
            print(f"No results found for {instance_name}")
            return
        
        # CHANGE: Larger figure size for better readability
        plt.figure(figsize=(12, 7))  # Increased from (10, 6)

        # CHANGE: Lighter color and thinner lines for individual runs
        for run in result['run_results']:  # Loop through all runs
            plt.plot(run['convergence'], alpha=0.2, color='lightblue', linewidth=0.8)  # Changed from alpha=0.3, color='blue', default linewidth

        # Plot average convergence with thicker line
        all_conv = [r['convergence'] for r in result['run_results']]  # All convergence histories
        max_len = max(len(c) for c in all_conv)  # Find longest convergence history
        avg_conv = [np.mean([c[i] for c in all_conv if i < len(c)]) for i in range(max_len)]  # Average at each iteration
        # CHANGE: Thicker line, changed color, added zorder
        plt.plot(avg_conv, color='blue', linewidth=3, label='Average', zorder=5)  # Changed from color='red', linewidth=2, no zorder

        # Plot optimal value if available
        if result['instance'].optimal_value:  # If optimal value is known
            # CHANGE: Changed color to red, thicker line, added zorder
            plt.axhline(y=result['instance'].optimal_value,   # Horizontal line at optimal
                        color='red', linestyle='--', linewidth=2, label='Optimal', zorder=10)  # Changed from color='green', no linewidth, no zorder

        # CHANGE: Larger font sizes for all text elements
        plt.xlabel('Iteration', fontsize=12)  # Added fontsize=12
        plt.ylabel('Best Value', fontsize=12)  # Added fontsize=12
        plt.title(f'Convergence: {instance_name}', fontsize=14, fontweight='bold')  # Added fontsize=14, fontweight='bold'
        plt.legend(fontsize=11, loc='lower right')  # Added fontsize=11, changed loc from default
        # CHANGE: Lighter, dotted grid with thinner lines
        plt.grid(True, alpha=0.3, linestyle=':', linewidth=0.5)  # Added linestyle=':', linewidth=0.5
        plt.tight_layout()  # Adjust layout to prevent label cutoff
        plt.show()  # Display the plot

    def generate_summary_table(self):
        """
        Generate summary table with ONLY the metrics required by the task.
        
        Task requirements:
        1. Table columns: instance, n, W, best value, weight used, baseline value, improvement %
        2. Statistics: best, average, std dev, worst (from 10 runs)
        3. Secondary: runtime, iterations to best
        
        Returns:
            pd.DataFrame: Summary table matching task requirements exactly
        """
        data = []
        
        # Loop through all experiment results
        for result in self.results:
            inst = result['instance']           # KnapsackInstance object
            runs = result['run_results']        # List of 10 run results
            
            # ================================================================
            # EXTRACT DATA from all runs
            # ================================================================
            values = [r['best_value'] for r in runs]              # Best values from each run
            improvements = [r['improvement'] for r in runs]       # Improvements over baseline
            final_weights = [r['final_weight'] for r in runs]    # Weights used
            runtimes = [r['time'] for r in runs]                  # Runtimes
            iters_to_best = [r['best_iteration'] for r in runs]  # Iterations to best
            
            # Baseline value (same for all runs)
            baseline_value = runs[0]['baseline_value']
            
            # ================================================================
            # CALCULATE STATISTICS (as required by task)
            # ================================================================
            
            # Solution quality statistics (from 10 runs)
            best_value = max(values)              # Best value across 10 runs
            avg_value = np.mean(values)           # Average value
            std_dev = np.std(values)              # Standard deviation
            worst_value = min(values)             # Worst value
            
            # Secondary metrics
            avg_runtime = np.mean(runtimes)       # Average runtime (seconds)
            avg_iter_to_best = np.mean(iters_to_best)  # Average iteration where best found
            
            # Weight calculation
            if inst.num_knapsacks == 1:
                avg_weight_used = np.mean([w[0] for w in final_weights])
            else:
                avg_weight_used = np.mean([sum(w) for w in final_weights])
            
            # Total capacity
            total_capacity = sum(inst.capacities) if inst.num_knapsacks > 1 else inst.capacities[0]
            
            # ================================================================
            # BUILD ROW - ONLY columns required by task
            # ================================================================
            data.append({
                # Primary table columns (from task requirement)
                'Instance': inst.name,                      # Instance name
                'n': inst.num_items,                        # Number of items
                'W': total_capacity,                        # Capacity constraint
                'Best Value': best_value,                   # Best value found
                'Weight Used': avg_weight_used,             # Average weight used
                'Baseline': baseline_value,                 # Greedy baseline value
                'Improvement (%)': np.mean(improvements),   # Improvement over baseline
                
                # Statistics over 10 runs (from task requirement)
                'Avg Value': avg_value,                     # Average value
                'Std Dev': std_dev,                         # Standard deviation
                'Worst Value': worst_value,                 # Worst value
                
                # Secondary metrics (from task requirement)
                'Avg Runtime (s)': avg_runtime,             # Average runtime
                'Avg Iter to Best': avg_iter_to_best       # Average iteration to best
            })
        
        # Return as pandas DataFrame
        return pd.DataFrame(data)

In [None]:
# ============================================================================
# MAIN EXECUTION
# ============================================================================
print("Loading instances...")

loader = DataLoader()

# Load instances
or_instances = loader.load_or_library(
    "data/OR-Library/mknap2.txt",
    ['WEING1', 'WEING2', 'WEISH06', 'WEISH07', 'WEISH26', 'WEISH30']
)
pisinger_instance = loader.load_pisinger(
    "data/PisingerHard/knapPI_11_50_1000.csv", 
    'knapPI_11_50_1000_1'
)

all_instances = or_instances + [pisinger_instance]
print(f"Loaded {len(all_instances)} instances")
for inst in all_instances:
    print(f"  - {inst.name}: {inst.num_items} items, {inst.num_knapsacks} knapsacks")

# ============================================================================
# CONFIGURE AND RUN EXPERIMENTS (FIXED PARAMETERS)
# ============================================================================
ba_params = {
    'num_scout_bees': 100,
    'num_selected_sites': 20,
    'num_elite_sites': 10,
    'num_bees_elite': 30,
    'num_bees_selected': 15,
    'max_iterations': 100,              
    'initial_neighborhood_size': 5,     
    'shrinking_factor': 0.8,           
    'stagnation_limit': 15             
}

runner = ExperimentRunner(all_instances)
runner.run_all_instances(num_runs=10, ba_params=ba_params)

# ============================================================================
# DISPLAY RESULTS (Task-Required Table)
# ============================================================================
summary_df = runner.generate_summary_table()
print("\n" + "="*140)
print("RESULTS TABLE (Task Requirements: instance, n, W, best value, weight used, baseline, improvement %)")
print("Plus: Statistics over 10 runs (avg, std dev, worst) + Secondary metrics (runtime, iter to best)")
print("="*140)
print(summary_df.to_string(index=False))

# ============================================================================
# PLOT CONVERGENCE CURVES (Task Requirement)
# ============================================================================
print("\n" + "="*140)
print("CONVERGENCE PLOTS (Task Requirement: iteration vs best value)")
print("="*140)

for instance in all_instances:
    runner.plot_convergence(instance.name)

print("\n" + "="*140)
print("EXPERIMENTS COMPLETED!")
print("="*140)