In [1]:
#IMPORT NECESSARY LIBRARIES

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import time
import gurobipy as gp
from gurobipy import GRB
import networkx as nx

In [2]:
# filepath = "C:/Users/20204018/OneDrive - TU Eindhoven/Documents/Master DSAI/YEAR 1 Q1/Optimization For DS/KidneyExchangeOptimization/Instance Files/Delorme_1000_NDD_Unit_0.txt"

In [3]:
def import_kidney_data(filepath):
    """
    Imports kidney exchange data from a text file and structures it into a dictionary.

    Args:
        filepath (str): The path to the text file containing the kidney exchange data.

    Returns:
        dict: A dictionary containing the following keys:
            - num_pairs (int): The number of pairs (donor-patient pairs) in the instance.
            - num_ndd (int): The number of non-directed donors (NDDs) in the instance.
            - num_arcs (int): The total number of arcs in the instance.
            - pairs (list): A list of dictionaries, each representing a pair or NDD with the following keys:
                - id (int): The unique ID of the pair.
                - is_ndd (bool): True if the pair is an NDD, False otherwise.
                - donor_blood_type (int): Donor's blood type (0 = A, 1 = B, 2 = AB, 3 = O).
                - patient_blood_type (int): Patient's blood type (0 = A, 1 = B, 2 = AB, 3 = O).
                - patient_vpra (int): Patient's vPRA score (0 = below 0.5, 1 = between 0.5 and 0.85, 2 = above 0.85).
            - arcs (list): A list of dictionaries representing the arcs between pairs with the following keys:
                - donor_id (int): ID of the donor pair.
                - patient_id (int): ID of the patient pair.
                - weight (int): The weight of the arc (always 1 in this case).

    Example:
        data = import_kidney_data('path/to/data.txt')
        print(data['num_pairs'])  # Output: Number of pairs in the instance
    """

    data = {}
    
    # Open the file and read all lines into memory
    with open(filepath, 'r') as f:
        lines = f.readlines()
        
        # Parse metadata: number of pairs, NDDs, and arcs
        num_pairs = int(lines[0].split(' ')[2])
        num_ndd = int(lines[1].split(' ')[2])
        num_arcs = int(lines[2].split(' ')[2])
        
        # Store the parsed values in the data dictionary
        data['num_pairs'] = num_pairs
        data['num_ndd'] = num_ndd
        data['num_arcs'] = num_arcs
        
        # Total number of entities (pairs + NDDs)
        num_things = num_pairs + num_ndd
        
        # List to store information about pairs and NDDs
        pairs = []
        
        # Parse each line corresponding to pairs and NDDs
        for line in lines[3:num_things + 3]:
            id, is_ndd, donor_blood_type, patient_blood_type, patient_vpra = map(int, line.strip().split(','))
            
            # Add pair/NDD info to the list
            pairs.append({
                'id': id,
                'is_ndd': bool(is_ndd),
                'donor_blood_type': donor_blood_type,
                'patient_blood_type': patient_blood_type,
                'patient_vpra': patient_vpra,
            })
        
        # Store the pairs data in the dictionary
        data['pairs'] = pairs

        # List to store information about arcs
        arcs = []
        
        # Parse each line corresponding to arcs (donor-patient relationships)
        for line in lines[num_things + 3:]:
            arc, weight = line.strip().split(',1,')
            
            # Extract donor and patient IDs from the arc
            donor_id, patient_id = arc.split(',')
            weight = int(weight.strip())  # Weight is always 1 in this context
            donor_id = int(donor_id[1:])  # Remove prefix and convert to int
            patient_id = int(patient_id[:-1])  # Remove suffix and convert to int
            
            # Add arc info to the list
            arcs.append({
                'donor_id': donor_id,
                'patient_id': patient_id,
                'weight': weight,
            })
        
        # Store the arcs data in the dictionary
        data['arcs'] = arcs

    # Return the final structured dictionary
    return data


# Example usage of the function
#data = import_kidney_data(filepath)

In [4]:
def create_graph(data):
    """
    Creates a directed graph from the input data.

    Parameters:
    data (dict): A dictionary with two keys:
        - 'pairs': A list of dictionaries, each containing an 'id' for a node.
        - 'arcs': A list of dictionaries, each containing 'donor_id' (start node), 
                  'patient_id' (end node), and 'weight' (weight of the edge).

    Returns:
    G (nx.DiGraph): A directed graph created from the input data.
    """
    G = nx.DiGraph()
    
    # Add nodes to the graph from 'pairs' in data
    for pair in data['pairs']:
        G.add_node(pair['id'])
    
    # Add edges (directed) with weights from 'arcs' in data
    for arc in data['arcs']:
        G.add_edge(arc['donor_id'], arc['patient_id'], weight=arc['weight'])
    
    return G


def findPaths(G, u, n):
    """
    Recursively finds all paths of length n starting from node u.

    Parameters:
    G (nx.DiGraph): A directed graph.
    u (hashable): The starting node of the path.
    n (int): The length of the paths to find.

    Returns:
    paths (list): A list of all possible paths of length n starting from node u. 
                  Each path is represented as a list of nodes.
    """
    if n == 0:
        # Base case: if the path length is 0, return the starting node as the path
        return [[u]]
    
    # Recursive case: explore neighbors of the current node and find paths of length n-1
    paths = [[u] + path for neighbor in G.neighbors(u) for path in findPaths(G, neighbor, n-1)]
    
    return paths


def find_cycles(G, u, n):
    """
    Finds all cycles of length n starting and ending at node u.

    Parameters:
    G (nx.DiGraph): A directed graph.
    u (hashable): The node where the cycle starts and ends.
    n (int): The length of the cycle (number of edges in the cycle).

    Returns:
    cycles (list): A list of all cycles of length n starting and ending at node u.
                   Each cycle is represented as a tuple of nodes.
    """
    # Find all paths of length n starting from node u
    paths = findPaths(G, u, n)
    
    # Filter paths that form a cycle, meaning they end at u and visit u exactly twice
    return [tuple(path) for path in paths if (path[-1] == u) and sum(x == u for x in path) == 2]


def set_weight(dict_list, donor_id, patient_id, weight_key="weight"):
    """
    Retrieve the weight of an arc between a donor and a patient.

    Args:
        dict_list (list): List of dictionaries representing arcs.
        donor_id (int): The ID of the donor.
        patient_id (int): The ID of the patient.
        weight_key (str): The key to access the weight in the dictionary (default is "weight").

    Returns:
        int: The weight of the arc if found, otherwise None.
    """
    for dct in dict_list:
        if dct.get("donor_id") == donor_id and dct.get("patient_id") == patient_id:
            return dct.get(weight_key)
    return None  # Return None if no matching arc is found

In [5]:
def calculate_cycles_and_paths(G, data, k):
    """
    Calculates cycles and paths for a single value of k from the directed graph G.
    
    Parameters:
    G (nx.DiGraph): The directed graph.
    data (dict): The kidney exchange data.
    k (int): The maximum cycle length to process.

    Returns:
    c (list): A list of cycles.
    p (list): A list of paths.
    """
    c = []  # List to store information about cycles
    p = []  # List to store information about paths
    id_count = 0

    # Loop through possible cycle lengths from 1 to k-1 (cycle length)
    for l in range(2, k+1):
        for node in G.nodes:  # Iterate over all nodes in the graph
            for cyc in list(find_cycles(G, node, l)):  # Find all cycles starting at this node of length 'l'
                cyc_success = 0
                cyc_weight = 0
                # Calculate the success and weight for the cycle
                for n in range(1, len(cyc)):
                    cyc_success += data['pairs'][cyc[n]]['patient_vpra']  # Sum up the vPRA values of patients in the cycle
                    cyc_weight += set_weight(data['arcs'], cyc[n-1], cyc[n])  # Add the weight of the arc between consecutive pairs in the cycle
                # Append the cycle info to the list 'c'
                c.append({'id': id_count, 'cycle': cyc, 'vpra_sum': cyc_success, 'weight_sum': cyc_weight})  
                id_count += 1

    # Loop through possible path lengths from 1 to k-2 (since paths are from NDDs)
    for l in range(1, k-1):
        for node in G.nodes:  # Iterate over all nodes in the graph
            if data['pairs'][node]['is_ndd']:  # Only consider nodes that are NDDs
                for path in list(findPaths(G, node, l)):  # Find all paths starting at this NDD node of length 'l'
                    path_success = 0
                    path_weight = 0
                    # Calculate the success and weight for the path
                    for n in range(1, len(path)):
                        path_success += data['pairs'][path[n]]['patient_vpra']  # Sum up the vPRA values of patients in the path
                        path_weight += set_weight(data['arcs'], path[n-1], path[n])  # Add the weight of the arc between consecutive pairs in the path
                    # Append the path info to the list 'p'
                    p.append({'id': id_count, 'path': path, 'vpra_sum': path_success, 'weight_sum': path_weight}) 
                    id_count += 1

    # Return cycles and paths
    return c, p



# Implement branch and cut method


In [6]:
def solve_kidney_exchange(G, cycles, paths, k, time_limit=300, mip_gap = 0.001):
    """
    Solves the kidney exchange problem using Gurobi's branch-and-bound algorithm.

    Parameters:
    G (nx.DiGraph): A directed graph representing the kidney exchange network.
    cycles (list of dicts): List of cycles, where each dict contains:
        - 'id': Unique ID for the cycle.
        - 'cycle': List of nodes in the cycle.
        - 'weight_sum': Sum of weights (or values) for the edges in the cycle.
    paths (list of dicts): List of paths (chains from NDD), where each dict contains:
        - 'id': Unique ID for the path.
        - 'path': List of nodes in the path.
        - 'weight_sum': Sum of weights (or values) for the edges in the path.
    k (int): The maximum allowed cycle length to process.
    time_limit (int): The time limit for solving the model (in seconds).
    mip_gap (float): The MIP optimality gap tolerance.

    Returns:
    dict: A summary of the solution, including selected cycles, paths, and optimization statistics.
    """

    # Start timer for performance tracking
    start_time = time.time()

    # Initialize the model
    model = gp.Model("KidneyExchange")

    # Step 1: Define Binary decision variables for cycles and paths
    cycle_vars = {cycle['id']: model.addVar(vtype=GRB.BINARY, name=f"cycle_{cycle['id']}") for cycle in cycles}
    path_vars = {path['id']: model.addVar(vtype=GRB.BINARY, name=f"path_{path['id']}") for path in paths}

    # Step 2: Set objective function to maximize total weight of selected cycles and paths
    obj = gp.LinExpr()

    for cycle in cycles:
        obj += cycle['weight_sum'] * cycle_vars[cycle['id']]
    
    for path in paths:
        obj += path['weight_sum'] * path_vars[path['id']]

    model.setObjective(obj, GRB.MAXIMIZE)

    # Step 3: Add constraints to ensure each vertex (pair or NDD) is in at most one cycle or path
    for node in G.nodes:
        constraint = gp.LinExpr()

        # Add constraints for cycles
        for cycle in cycles:
            if node in cycle['cycle']:
                constraint += cycle_vars[cycle['id']]

        # Add constraints for paths
        for path in paths:
            if node in path['path']:
                constraint += path_vars[path['id']]

        # Ensure each node is used at most once
        model.addConstr(constraint <= 1, name=f"vertex_disjoint_{node}")

    # Step 4: Add a constraint to limit cycle sizes based on the value of k
    for cycle in cycles:
        if len(cycle['cycle']) > k:
            model.addConstr(cycle_vars[cycle['id']] == 0, name=f"max_cycle_size_{cycle['id']}")

    # Step 5: Set solver parameters
    model.Params.TimeLimit = time_limit  # Set a time limit (in seconds)
    model.Params.MIPGap = mip_gap       # Set a MIP optimality gap tolerance

    # Solve the model
    model.optimize()

    # Initialize solution dictionary
    solution = {
        'total_score': 0,
        'selected_cycles': [],
        'selected_paths': [],
        'optimization_info': {}
    }

    # Check if the model has found an optimal or feasible solution
    if model.status in [GRB.OPTIMAL, GRB.TIME_LIMIT]:
        if model.status == GRB.OPTIMAL:
            print("Optimal solution found!")
        elif model.status == GRB.TIME_LIMIT:
            print("Time limit reached. Returning best feasible solution found.")

        total_score = 0

        # Extract selected cycles
        for cycle in cycles:
            if round(cycle_vars[cycle['id']].x) > 0.5:  # Cycle selected
                cycle_type = "Cycle"
                cycle_size = len(cycle['cycle'])  # Size of the cycle (number of nodes)
                cycle_nodes = cycle['cycle']  # Nodes in the cycle
                cycle_score = cycle['weight_sum']  # Total score for the cycle
                total_score += cycle_score

                # Store cycle details in the solution
                solution['selected_cycles'].append({
                    'type': cycle_type,
                    'size': cycle_size,
                    'nodes': cycle_nodes,
                    'score': cycle_score
                })

        # Extract selected paths (chains)
        for path in paths:
            if round(path_vars[path['id']].x) > 0.5:  # Path selected
                path_type = "Chain"
                path_size = len(path['path'])  # Size of the path (number of nodes)
                path_nodes = path['path']  # Nodes in the path
                path_score = path['weight_sum']  # Total score for the path
                total_score += path_score

                # Store path details in the solution
                solution['selected_paths'].append({
                    'type': path_type,
                    'size': path_size,
                    'nodes': path_nodes,
                    'score': path_score
                })

        solution['total_score'] = total_score

        # Capture optimization statistics
        total_time = time.time() - start_time
        solution['optimization_info'] = {
            'optimal_solution_found': model.status == GRB.OPTIMAL,
            'total_time_s': total_time,
            'number_of_variables': model.NumVars,
            'number_of_constraints': model.NumConstrs,
            'number_of_non_zeros': model.NumNZs,
            'objective_1_max_cycles_and_chains': len(solution['selected_cycles']) + len(solution['selected_paths']),
            'objective_2_min_cycles_and_chains_of_size_4': len([x for x in solution['selected_cycles'] if len(x['nodes']) == 4]),
            'objective_3_min_cycles_chains_of_size_3': len([x for x in solution['selected_cycles'] if len(x['nodes']) == 3]),
            'objective_4_max_total_score_weight': total_score
        }
    else:
        print(f"No optimal or feasible solution found. Status: {model.status}")

    return solution



# Process and save multiple optimization processes in one run

In [7]:
def save_solution(filepath, solution, output_folder, k):
    """
    Saves the solution of each file in the output folder, including optimization info and transplant count.
    
    Args:
        filepath (str): The path to the text file containing the kidney exchange data.
        solution (dict): The solution data to be saved.
        output_folder (str): The path to the folder where solutions should be saved.
        k (int): The value of k (cycle length) to append to the solution file name.
    """
    # Step 1: Extract the filename without the extension
    filename = os.path.basename(filepath)
    file_base_name = os.path.splitext(filename)[0]

    # Step 2: Create the solution file path inside the output folder, with k in the filename
    solution_file_path = os.path.join(output_folder, f"{file_base_name}_solution_k{k-1}.txt")
    
    # Step 3: Calculate the total number of transplants (i.e., the number of unique nodes in cycles and paths)
    total_transplants = sum([len(set(cycle['nodes'])) for cycle in solution['selected_cycles']]) + \
                        sum([path['size'] for path in solution['selected_paths']])
    
    # Step 4: Save the solution in the file
    with open(solution_file_path, 'w') as f:
        f.write(f"Running with k = {k-1}:\n\n")  # Add k to the output file
        
        f.write("Selected Cycles:\n")
        for idx, cycle in enumerate(solution['selected_cycles']):
            unique_cycle_size = len(set(cycle['nodes']))  # Count unique nodes
            
            f.write(f"{idx + 1}:\n")
            f.write(f"Type: {cycle['type']}\n")
            f.write(f"Size: {unique_cycle_size}\n")  # Use the unique cycle size
            f.write(f"Nodes: {', '.join(map(str, cycle['nodes']))}\n")
            f.write(f"Score: {cycle['score']}\n")
            f.write("-------------------------------------------\n")
        
        f.write("\nSelected Paths:\n")
        for idx, path in enumerate(solution['selected_paths']):
            f.write(f"{idx + 1}:\n")
            f.write(f"Type: {path['type']}\n")
            f.write(f"Size: {path['size']}\n")
            f.write(f"Nodes: {', '.join(map(str, path['nodes']))}\n")
            f.write(f"Score: {path['score']}\n")
            f.write("-------------------------------------------\n")
        
        # Step 5: Add the total score and transplants to the file
        f.write(f"Total score: {solution['total_score']}\n")
        f.write(f"Number of transplants: {total_transplants}\n")  # Add the number of transplants
        
        # Step 6: Add optimization information
        f.write("\nOptimization information:\n")
        for key, value in solution['optimization_info'].items():
            f.write(f"{key}: {value}\n")
    
    print(f"Solution saved for k = {k} in {solution_file_path}")



In [8]:

def process_files(directory, output_folder, k_values):
    """
    Processes all relevant files in the directory, computes solutions for multiple k values, and saves them in the output folder.
    
    Args:
        directory (str): The directory containing the input files.
        output_folder (str): The directory where the solutions should be saved.
        k_values (list): A list of k values to process.
    """
    prefixes = ["RandomSparse", "Delorme", "Saidman"]
    
    # List of specific files to process for k == 6
    files_for_k6 = [
        "Delorme_50_NDD_Weight_0.txt", "Delorme_200_NDD_Weight_0.txt", 
        "Saidman_50_NDD_Weight_0.txt", "Saidman_200_NDD_Weight_0.txt", 
        "RandomSparse_200_NDD_Weight_0.txt"
    ]
    
    files = [f for f in os.listdir(directory) if any(f.startswith(prefix) for prefix in prefixes) and f.endswith('.txt')]

    for file in files:
        filepath = os.path.join(directory, file)
        
        # Loop through each value of k and process the solutions
        for k in k_values:
            # Check if k is 6, then only process specific files
            if k == 6:
                if file not in files_for_k6:
                    print(f"Skipping file {file} for k = {k}")
                    continue  # Skip files that are not in the allowed list for k = 6
            
            print(f"Calculating cycles and paths for k = {k}, file: {file}")
            
            # Import the kidney exchange data from the file
            data = import_kidney_data(filepath)
            
            # Create a directed graph from the data
            G = create_graph(data)
            
            # Calculate cycles and paths for the current k
            cycles, paths = calculate_cycles_and_paths(G, data, k)
            
            # Solve the kidney exchange problem
            solution = solve_kidney_exchange(G, cycles, paths, k)
            
            # Save the solution for the current k
            save_solution(filepath, solution, output_folder, k)
            print(f"Results saved for k = {k}, file: {file}\n")

# Define your input directory and the output folder - change the paths as needed
input_directory = "C:/Users/20204018/OneDrive - TU Eindhoven/Documents/Master DSAI/YEAR 1 Q1/Optimization For DS/KidneyExchangeOptimization/test"
output_folder = "C:/Users/20204018/OneDrive - TU Eindhoven/Documents/Master DSAI/YEAR 1 Q1/Optimization For DS/KidneyExchangeOptimization/exact method/Solutions_branch_and_bound"

# Example usage
k_values = [4,5,6]
process_files(input_directory, output_folder, k_values)


Calculating cycles and paths for k = 4, file: Delorme_1000_NDD_Weight_0.txt
