In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
import time
import re

class KnapsackInstance:
    """
    Class to store knapsack probelm instance information.
    """

    def __init__(self, name: str, num_items: int, 
                 num_knapsacks: int, capacities: List[int], profits: List[int], 
                 weights: np.ndarray, optimal_value: int = None):
        """
        Initialize a knapsack instance.

        Args:
            name: Instance name/identifier
            num_items: Number of items (n)
            num_knapsacks: Nimer of kanpsacks 
            capacities: List of capacitycontraints for each knapsack 
            profits: Profit/value of each item
            weigths: Weight matrix 
            oprimal_value: Know optimal value
        """
        self.name = name
        self.num_items = num_items
        self.num_knapsacks = num_knapsacks
        self.capacities = capacities
        self.profits = profits
        self.weights = weights
        self.optimal_value = optimal_value

In [6]:
class DataLoader:
    """
    Handles loading of knapsack instances from different file formats.
    """

    def load_or_library(self, filepath,instance_names):
        """
        Load OR-Library instances from mknap2.txt file. """
        with open(filepath, 'r') as file:
            content = file.readlines()
        
        instances = []
        for name in instance_names:
            match = re.search(rf"{name}.*?(?=\n[A-Z]+|\Z)", content, re.DOTALL)
            if not match:
                print(f"Warning: Instance {name} not found")
                continue
            
            lines = match.group(0).strip().split('\n')
            header = lines[0].split()
            n, m, opt = int(header[1]), int(header[2]), int(header[3])

            # Parse all numbers after header
            numbers = []
            for line in lines[1:]:
                numbers.extend([int(x) for x in line.split()])
            # Extract sections
            profits = numbers[:n]
            weights = [numbers[n + i*n : n + (i+1)*n] for i in range(m)]
            capacities = numbers[n + m*n : n + m*n + m]

            instances.append(KnapsackInstance(
                header[0], n, m, capacities, profits, np.array(weights), opt
            ))
        
        return instances

    def load_pisinger(self,filepath, instance_name):
        """
        Load Pisinger instance form CSV file.
        """
        with open(filepath, 'r') as file:
            content = file.read()

        match = re.search(rf"{instance_name}\n(.*?)(?=\nknapPI_|\Z)", content, re.DOTALL)
        if not match:
            raise ValueError(f"Instance {instance_name} not found")
        lines = match.group(1).strip().split('\n')
        n = int(lines[0].split()[1])
        capacity = int(lines[1].split()[1])
        optimal = int(lines[2].split()[1])
        
        # Parse items (skip header, parse weight and profit)
        weights, profits = [], []
        for line in lines[4:4+n]:
            parts = line.split(',')
            weights.append(int(parts[1]))
            profits.append(int(parts[2]))
        
        return KnapsackInstance(
            instance_name, n, 1, [capacity], profits, np.array([weights]), optimal
        )