In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
import time
import re

class KnapsackInstance:
    """
    Class to store knapsack probelm instance information.
    """

    def __init__(self, name: str, num_items: int, 
                 num_knapsacks: int, capacities: List[int], profits: List[int], 
                 weights: np.ndarray, optimal_value: int = None):
        """
        Initialize a knapsack instance.

        Args:
            name: Instance name/identifier
            num_items: Number of items (n)
            num_knapsacks: Nimer of kanpsacks 
            capacities: List of capacitycontraints for each knapsack 
            profits: Profit/value of each item
            weigths: Weight matrix 
            oprimal_value: Know optimal value
        """
        self.name = name
        self.num_items = num_items
        self.num_knapsacks = num_knapsacks
        self.capacities = capacities
        self.profits = profits
        self.weights = weights
        self.optimal_value = optimal_value

In [11]:
class DataLoader:
    """
    Handles loading of knapsack instances from different file formats.
    """

    def load_or_library(self, filepath, instance_names):
        """Load OR-Library instances from mknap2.txt file."""
        # Read the entire file content
        with open(filepath, 'r') as file:
            content = file.read()
        # Helper function to clean a line by removing comments and trimming whitespace
        def clean_line(line: str) -> str:
            if '//' in line:
                line = line[:line.index('//')]
            return line.strip()

        instances = []
        content_lower = content.lower()
        for name in instance_names:
            # Find the start of the instance block by name
            search_pattern = f"problem {name}.DAT"
            search_lower = search_pattern.lower()

            idx = content_lower.find(search_lower)
            if idx == -1:
                print(f"Warning: Instance {name} not found")
                continue
            # Fin the first line after the instance header
            start_pos = content.find('\n', idx)
            if start_pos == -1:
                print(f"Warning: Malformed block for {name}")
                continue
            start_pos += 1

            # Skip empty/comment/++++ lines to find the m n line
            while start_pos < len(content):
                line_end = content.find('\n', start_pos)
                if line_end == -1:
                    line_end = len(content)
                first_line = clean_line(content[start_pos:line_end])
                if first_line and not first_line.startswith('++++'):
                    break
                start_pos = line_end + 1
            # If no valid m n line found, skip this isinstance
            if not first_line:
                print(f"Warning: Could not find m,n for {name}")
                continue
            
            # Parse m (number of knapsacks) and n (number of items)
            parts = first_line.split()
            if len(parts) < 2:
                print(f"Warning: Could not parse m,n for {name}")
                continue

            m = int(parts[0])
            n = int(parts[1])

            # Find the data bloack for this instance
            data_start = line_end + 1
            next_problem_idx = content_lower.find("problem ", data_start)
            data_end = next_problem_idx if next_problem_idx != -1 else len(content)

            instance_text = content[data_start:data_end]
            numbers = []

            # Parse all numbers in the data block, skipping comments/empty/++++ lines
            for line in instance_text.split('\n'):
                cleaned = clean_line(line)
                if not cleaned or cleaned.startswith('++++'):
                    continue
                for token in cleaned.split():
                    try:
                        numbers.append(int(token))
                    except ValueError:
                        continue
            # Check if we have enough numbers for profits, capacities, weigths, and optimal value
            expected_count = n + m + (m * n) + 1
            if len(numbers) < expected_count:
                print(f"Warning: Not enough numbers for {name}. Expected {expected_count}, got {len(numbers)}")
                continue
            # Extract optimal value (last number)
            opt = numbers[-1]
            numbers = numbers[:-1]
            # Extract profits and capacities
            profits = numbers[:n]
            capacities = numbers[n:n+m]
            # Extract weights as 2d list (m rows, n columns)
            weights = []
            weight_start = n + m
            for i in range(m):
                row_start = weight_start + i * n
                row_end = row_start + n
                weights.append(numbers[row_start:row_end])
            # Create and store the KnapsackInstance
            instances.append(KnapsackInstance(
                name, n, m, capacities, profits, np.array(weights), opt
            ))

        return instances


    def load_pisinger(self, filepath, instance_name):
        """
        Load Pisinger instance form CSV file.
        """
        # Read the file content
        with open(filepath, 'r') as file:
            content = file.read()
        # Find the block for the given instance name using regex
        match = re.search(rf"{instance_name}\n(.*?)(?=\nknapPI_|\Z)", content, re.DOTALL)
        if not match:
            raise ValueError(f"Instance {instance_name} not found")
        lines = match.group(1).strip().split('\n')
        n = int(lines[0].split()[1])
        capacity = int(lines[1].split()[1])
        optimal = int(lines[2].split()[1])
        
        # Parse items (skip header, parse weight and profit)
        weights, profits = [], []
        for line in lines[4:4+n]:
            parts = line.split(',')
            weights.append(int(parts[1]))
            profits.append(int(parts[2]))
        
        return KnapsackInstance(
            instance_name, n, 1, [capacity], profits, np.array([weights]), optimal
        )

In [12]:
import numpy as np


def test_dataloader():
    """
    Test DataLoader to verify it can read all required instances correctly.
    """
    print("="*80)
    print("TESTING DATALOADER")
    print("="*80)
    
    loader = DataLoader()
    
    # Test OR-Library instances
    print("\n1. Testing OR-Library instances...")
    print("-" * 80)
    
    or_library_path = "data/OR-Library/mknap2.txt"
    instance_names = ['WEING1', 'WEING2', 'WEISH06', 'WEISH07', 'WEISH26', 'WEISH30']
    
    try:
        or_instances = loader.load_or_library(or_library_path, instance_names)
        print(f"✓ Successfully loaded {len(or_instances)} OR-Library instances")
        
        # Expected values
        expected = {
            'WEING1': {'items': 28, 'knapsacks': 2},
            'WEING2': {'items': 28, 'knapsacks': 2},
            'WEISH06': {'items': 40, 'knapsacks': 5},
            'WEISH07': {'items': 40, 'knapsacks': 5},
            'WEISH26': {'items': 90, 'knapsacks': 5},
            'WEISH30': {'items': 90, 'knapsacks': 5}
        }
        
        print("\nInstance Details:")
        all_correct = True
        for instance in or_instances:
            exp = expected.get(instance.name, {})
            items_match = instance.num_items == exp.get('items')
            knapsacks_match = instance.num_knapsacks == exp.get('knapsacks')
            
            status = "✓" if (items_match and knapsacks_match) else "✗"
            print(f"  {status} {instance.name}: "
                  f"{instance.num_items} items, "
                  f"{instance.num_knapsacks} knapsacks, "
                  f"optimal={instance.optimal_value}")
            
            # Verify data integrity
            assert instance.num_items > 0, f"{instance.name}: Invalid num_items"
            assert instance.num_knapsacks > 0, f"{instance.name}: Invalid num_knapsacks"
            assert len(instance.profits) == instance.num_items, f"{instance.name}: Profits length mismatch"
            assert instance.weights.shape == (instance.num_knapsacks, instance.num_items), \
                f"{instance.name}: Weights shape mismatch"
            assert len(instance.capacities) == instance.num_knapsacks, \
                f"{instance.name}: Capacities length mismatch"
            assert instance.optimal_value is not None, f"{instance.name}: Missing optimal value"
            
            if not (items_match and knapsacks_match):
                all_correct = False
                print(f"    Expected: {exp.get('items')} items, {exp.get('knapsacks')} knapsacks")
        
        if all_correct:
            print("\n✓ All OR-Library instances have correct dimensions")
        
    except Exception as e:
        print(f"✗ Error loading OR-Library instances: {e}")
        return False
    
    # Test Pisinger instance
    print("\n" + "-" * 80)
    print("2. Testing Pisinger instance...")
    print("-" * 80)
    
    pisinger_path = "data/PisingerHard/knapPI_11_50_1000.csv"
    pisinger_name = 'knapPI_11_50_1000_1'
    
    try:
        pisinger_instance = loader.load_pisinger(pisinger_path, pisinger_name)
        print(f"✓ Successfully loaded Pisinger instance: {pisinger_name}")
        
        print(f"\nInstance Details:")
        print(f"  ✓ {pisinger_instance.name}: "
              f"{pisinger_instance.num_items} items, "
              f"{pisinger_instance.num_knapsacks} knapsack(s), "
              f"capacity={pisinger_instance.capacities[0]}, "
              f"optimal={pisinger_instance.optimal_value}")
        
        # Verify data integrity
        assert pisinger_instance.num_items == 50, "Expected 50 items"
        assert pisinger_instance.num_knapsacks == 1, "Expected 1 knapsack"
        assert len(pisinger_instance.profits) == 50, "Profits length mismatch"
        assert pisinger_instance.weights.shape == (1, 50), "Weights shape mismatch"
        assert len(pisinger_instance.capacities) == 1, "Capacities length mismatch"
        assert pisinger_instance.optimal_value is not None, "Missing optimal value"
        
        print("\n✓ Pisinger instance has correct dimensions")
        
    except Exception as e:
        print(f"✗ Error loading Pisinger instance: {e}")
        return False
    
    # Summary
    print("\n" + "="*80)
    print("SUMMARY")
    print("="*80)
    total_instances = len(or_instances) + 1
    print(f"✓ Successfully loaded and validated {total_instances} instances:")
    print(f"  - 6 OR-Library instances (2 small, 2 medium, 2 large)")
    print(f"  - 1 Pisinger instance")
    print("\n✓ All data integrity checks passed")
    print("✓ DataLoader is working correctly!")
    
    return True

# Run the test
if __name__ == "__main__":
    test_dataloader()

TESTING DATALOADER

1. Testing OR-Library instances...
--------------------------------------------------------------------------------
✓ Successfully loaded 6 OR-Library instances

Instance Details:
  ✓ WEING1: 28 items, 2 knapsacks, optimal=141278
  ✓ WEING2: 28 items, 2 knapsacks, optimal=130883
  ✓ WEISH06: 40 items, 5 knapsacks, optimal=5557
  ✓ WEISH07: 40 items, 5 knapsacks, optimal=5567
  ✓ WEISH26: 90 items, 5 knapsacks, optimal=9584
  ✓ WEISH30: 90 items, 5 knapsacks, optimal=11191

✓ All OR-Library instances have correct dimensions

--------------------------------------------------------------------------------
2. Testing Pisinger instance...
--------------------------------------------------------------------------------
✓ Successfully loaded Pisinger instance: knapPI_11_50_1000_1

Instance Details:
  ✓ knapPI_11_50_1000_1: 50 items, 1 knapsack(s), capacity=970, optimal=1428

✓ Pisinger instance has correct dimensions

SUMMARY
✓ Successfully loaded and validated 7 instance

In [None]:
class BeesAlgorithm: 
    """ 
    Bees Algorithm implementation for 0-1 Knapsack Problem.
    """
    def __init__(self,instance, num_scout_bees=50,num_selected_sites=10,
                 num_elite_site=5, num_bees_elite=20, num_bees_selected=10,
                 max_iterations=100, neighborhood_size=5, random_seed=None):
        """ 
        Initialize Bees Algorithm with parameters
        """
        self.instance = instance
        self.num_scout_bees = num_scout_bees
        self.num_selected_sites = num_selected_sites
        self.num_elite_site = num_elite_site
        self.num_bees_elit = num_bees_elite
        self.num_bees_selected = num_bees_selected
        self.max_iterations = max_iterations
        self.neighborhood_size = neighborhood_size

        if random_seed is not None:
            np.random.seed(random_seed)
        self.best_solution = None
        self.best_value = -np.inf 
        self.best_iteration = 0
        self.convergence_history = []
        self.iteration_times = []
    
    def initialize_solution(self):
        """ 
        Generate a random binary solution 
        """
        return np.random.randint(2,size=self.instance.num_items)
    
    def evaluate_solution(self,solution):
        """ 
        Evaluate a solution and return its total profit if feasible, else 0.
        """
        profit = np.dot(solution, self.instance.profits)

        if self.is_feasible(solution):
            return profit

        # Penalty for contraint violations
        violation = sum(max(0,np.dot(solution, self.instance.weights[k])- self.instance.capacities[k]))
        for k in range(self.instance.num_knapsacks):
            return profit - 1000 * violation
    