# Preliminaries

In [4]:
# Import libraries
import networkx as nx
import os

# Define filepath
filepath = '/Users/Martijn/Library/Mobile Documents/com~apple~CloudDocs/School/Year 5/Q1/Optimization for Data Science [2AMS50]/Project/Instance Files/Delorme_50_NDD_Unit_0.txt'

# Strip filename from filepath
filename = os.path.basename(filepath)


# Read in data

In [11]:
def import_kidney_data(filepath):
    """
    Imports kidney exchange data from a text file and structures it into a dictionary.

    Args:
        filepath (str): The path to the text file containing the kidney exchange data.

    Returns:
        dict: A dictionary containing the following keys:
            - num_pairs (int): The number of pairs (donor-patient pairs) in the instance.
            - num_ndd (int): The number of non-directed donors (NDDs) in the instance.
            - num_arcs (int): The total number of arcs in the instance.
            - pairs (list): A list of dictionaries, each representing a pair or NDD with the following keys:
                - id (int): The unique ID of the pair.
                - is_ndd (bool): True if the pair is an NDD, False otherwise.
                - donor_blood_type (int): Donor's blood type (0 = A, 1 = B, 2 = AB, 3 = O).
                - patient_blood_type (int): Patient's blood type (0 = A, 1 = B, 2 = AB, 3 = O).
                - patient_vpra (int): Patient's vPRA score (0 = below 0.5, 1 = between 0.5 and 0.85, 2 = above 0.85).
            - arcs (list): A list of dictionaries representing the arcs between pairs with the following keys:
                - donor_id (int): ID of the donor pair.
                - patient_id (int): ID of the patient pair.
                - weight (int): The weight of the arc (always 1 in this case).

    Example:
        data = import_kidney_data('path/to/data.txt')
        print(data['num_pairs'])  # Output: Number of pairs in the instance
    """

    data = {}
    
    # Open the file and read all lines into memory
    with open(filepath, 'r') as f:
        lines = f.readlines()
        
        # Parse metadata: number of pairs, NDDs, and arcs
        num_pairs = int(lines[0].split(' ')[2])
        num_ndd = int(lines[1].split(' ')[2])
        num_arcs = int(lines[2].split(' ')[2])
        
        # Store the parsed values in the data dictionary
        data['num_pairs'] = num_pairs
        data['num_ndd'] = num_ndd
        data['num_arcs'] = num_arcs
        
        # Total number of entities (pairs + NDDs)
        num_things = num_pairs + num_ndd
        
        # List to store information about pairs and NDDs
        pairs = []
        
        # Parse each line corresponding to pairs and NDDs
        for line in lines[3:num_things + 3]:
            id, is_ndd, donor_blood_type, patient_blood_type, patient_vpra = map(int, line.strip().split(','))
            
            # Add pair/NDD info to the list
            pairs.append({
                'id': id,
                'is_ndd': bool(is_ndd),
                'donor_blood_type': donor_blood_type,
                'patient_blood_type': patient_blood_type,
                'patient_vpra': patient_vpra,
            })
        
        # Store the pairs data in the dictionary
        data['pairs'] = pairs

        # List to store information about arcs
        arcs = []
        
        # Parse each line corresponding to arcs (donor-patient relationships)
        for line in lines[num_things + 3:]:
            arc, weight = line.strip().split(',1,')
            
            # Extract donor and patient IDs from the arc
            donor_id, patient_id = arc.split(',')
            weight = int(weight.strip())  # Weight is always 1 in this context
            donor_id = int(donor_id[1:])  # Remove prefix and convert to int
            patient_id = int(patient_id[:-1])  # Remove suffix and convert to int
            
            # Add arc info to the list
            arcs.append({
                'donor_id': donor_id,
                'patient_id': patient_id,
                'weight': weight,
            })
        
        # Store the arcs data in the dictionary
        data['arcs'] = arcs

    # Return the final structured dictionary
    return data


# Example usage of the function
data = import_kidney_data(filepath)


In [10]:
# Display the number of pairs, NDDs, and arcs
print(f"Kidney exchange data from: {filename}")
print(f"Number of pairs: {data['num_pairs']}")
print(f"Number of NDDs: {data['num_ndd']}")
print(f"Number of arcs: {data['num_arcs']}")
print('')

# Access information about a specific arc
arc_id = 205
arc = data['arcs'][arc_id] 
print(f"Arc ID: {arc_id}")
print(f"Donor ID: {arc['donor_id']}")
print(f"Patient ID: {arc['patient_id']}")
print(f"Weight: {arc['weight']}")
print('')

# Access information about a specific pair (based on pair ID)
pair_id = 49
pair = data['pairs'][pair_id]
print(f"Pair ID: {pair_id}")
print(f"Is NDD: {pair['is_ndd']}")
print(f"Donor blood type: {pair['donor_blood_type']}")
print(f"Patient blood type: {pair['patient_blood_type']}")
print(f"Patient vPRA: {pair['patient_vpra']}")
print('')

# Print the complete data for debugging purposes (optional)
# print(data)


Kidney exchange data from: Delorme_50_NDD_Unit_0.txt
Number of pairs: 48
Number of NDDs: 2
Number of arcs: 294

Arc ID: 205
Donor ID: 32
Patient ID: 17
Weight: 1

Pair ID: 49
Is NDD: True
Donor blood type: 0
Patient blood type: 0
Patient vPRA: 2



# Define graph functions

In [13]:
def create_graph(data):
    """
    Creates a directed graph from the input data.

    Parameters:
    data (dict): A dictionary with two keys:
        - 'pairs': A list of dictionaries, each containing an 'id' for a node.
        - 'arcs': A list of dictionaries, each containing 'donor_id' (start node), 
                  'patient_id' (end node), and 'weight' (weight of the edge).

    Returns:
    G (nx.DiGraph): A directed graph created from the input data.
    """
    G = nx.DiGraph()
    
    # Add nodes to the graph from 'pairs' in data
    for pair in data['pairs']:
        G.add_node(pair['id'])
    
    # Add edges (directed) with weights from 'arcs' in data
    for arc in data['arcs']:
        G.add_edge(arc['donor_id'], arc['patient_id'], weight=arc['weight'])
    
    return G


def findPaths(G, u, n):
    """
    Recursively finds all paths of length n starting from node u.

    Parameters:
    G (nx.DiGraph): A directed graph.
    u (hashable): The starting node of the path.
    n (int): The length of the paths to find.

    Returns:
    paths (list): A list of all possible paths of length n starting from node u. 
                  Each path is represented as a list of nodes.
    """
    if n == 0:
        # Base case: if the path length is 0, return the starting node as the path
        return [[u]]
    
    # Recursive case: explore neighbors of the current node and find paths of length n-1
    paths = [[u] + path for neighbor in G.neighbors(u) for path in findPaths(G, neighbor, n-1)]
    
    return paths


def find_cycles(G, u, n):
    """
    Finds all cycles of length n starting and ending at node u.

    Parameters:
    G (nx.DiGraph): A directed graph.
    u (hashable): The node where the cycle starts and ends.
    n (int): The length of the cycle (number of edges in the cycle).

    Returns:
    cycles (list): A list of all cycles of length n starting and ending at node u.
                   Each cycle is represented as a tuple of nodes.
    """
    # Find all paths of length n starting from node u
    paths = findPaths(G, u, n)
    
    # Filter paths that form a cycle, meaning they end at u and visit u exactly twice
    return [tuple(path) for path in paths if (path[-1] == u) and sum(x == u for x in path) == 2]


def set_weight(dict_list, donor_id, patient_id, weight_key="weight"):
    """
    Retrieve the weight of an arc between a donor and a patient.

    Args:
        dict_list (list): List of dictionaries representing arcs.
        donor_id (int): The ID of the donor.
        patient_id (int): The ID of the patient.
        weight_key (str): The key to access the weight in the dictionary (default is "weight").

    Returns:
        int: The weight of the arc if found, otherwise None.
    """
    for dct in dict_list:
        if dct.get("donor_id") == donor_id and dct.get("patient_id") == patient_id:
            return dct.get(weight_key)
    return None  # Return None if no matching arc is found

In [16]:
G = create_graph(data) # Create a directed graph from the data

k = 3  # Maximum length for cycles and paths
c = []  # List to store information about cycles
p = []  # List to store information about paths

id_count = 0
# Loop through possible cycle lengths from 1 to k-1
for l in range(1, k):
    for node in G.nodes:  # Iterate over all nodes in the graph
        for cyc in list(find_cycles(G, node, l)):  # Find all cycles starting at this node of length 'l'
            cyc_success = 0
            cyc_weight = 0
            # Calculate the success and weight for the cycle
            for n in range(1, len(cyc)):
                cyc_success += data['pairs'][cyc[n]]['patient_vpra']  # Sum up the vPRA values of patients in the cycle
                cyc_weight += set_weight(data['arcs'], cyc[n-1], cyc[n])  # Add the weight of the arc between consecutive pairs in the cycle
            # Append the cycle info to the list 'c'
            c.append({'id': id_count,'cycle': cyc, 'vpra_sum': cyc_success, 'weight_sum': cyc_weight})  
            id_count += 1


# Loop through possible path lengths from 1 to k-2 (since paths are from NDDs)
for l in range(1, k-1):
    for node in G.nodes:  # Iterate over all nodes in the graph
        if data['pairs'][node]['is_ndd']:  # Only consider nodes that are NDDs
            for path in list(findPaths(G, node, l)):  # Find all paths starting at this NDD node of length 'l'
                path_success = 0
                path_weight = 0
                # Calculate the success and weight for the path
                for n in range(1, len(path)):
                    path_success += data['pairs'][path[n]]['patient_vpra']  # Sum up the vPRA values of patients in the path
                    path_weight += set_weight(data['arcs'], path[n-1], path[n])  # Add the weight of the arc between consecutive pairs in the path
                # Append the path info to the list 'p'
                p.append({'id':id_count,'path': path, 'vpra_sum': path_success, 'weight_sum': path_weight}) 
                id_count += 1

# Print the results
print("Cycles:", c)
print("Paths:", p)

Cycles: [{'cycle': (6, 24, 6), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (12, 24, 12), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (24, 6, 24), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (24, 12, 24), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (24, 38, 24), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (24, 45, 24), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (25, 43, 25), 'vpra_sum': 2, 'weight_sum': 2}, {'cycle': (38, 24, 38), 'vpra_sum': 1, 'weight_sum': 2}, {'cycle': (43, 25, 43), 'vpra_sum': 2, 'weight_sum': 2}, {'cycle': (45, 24, 45), 'vpra_sum': 1, 'weight_sum': 2}]
Paths: [{'path': [48, 22], 'vpra_sum': 1, 'weight_sum': 1}, {'path': [48, 33], 'vpra_sum': 0, 'weight_sum': 1}, {'path': [49, 0], 'vpra_sum': 0, 'weight_sum': 1}, {'path': [49, 5], 'vpra_sum': 0, 'weight_sum': 1}, {'path': [49, 6], 'vpra_sum': 0, 'weight_sum': 1}, {'path': [49, 7], 'vpra_sum': 0, 'weight_sum': 1}, {'path': [49, 14], 'vpra_sum': 0, 'weight_sum': 1}, {'path': [49, 17], 'vpra_sum': 0, 'weight_sum': 1}, {'pa