# Kidney Exchange Instance Exploration

First-pass inspection of the raw XML instances stored in `instances/`.

In [3]:
from pathlib import Path
import pandas as pd

from instance_analysis import load_instance, summarize_instances

INSTANCE_DIR = Path("instances_large")
instance_paths = sorted(INSTANCE_DIR.glob("*.xml"))
summaries = summarize_instances(instance_paths)

summary_df = pd.DataFrame(summaries).set_index("instance")
summary_df

Unnamed: 0_level_0,donors_total,donors_altruistic,donors_paired,donors_without_matches,recipients_total,matches_total,avg_matches_per_donor,avg_matches_altruistic,avg_matches_paired,avg_match_score,donor_bloodtype_counts,recipient_bloodtype_counts,altruistic_donor_ids,donor_ids_without_matches
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
genxml-0.xml,1053,53,1000,0,1000,112958,107.272555,102.339623,107.534,45.417775,"{'A': 451, 'O': 367, 'AB': 122, 'B': 113}","{'O': 420, 'A': 377, 'B': 102, 'AB': 101}","[1191, 1192, 1193, 1194, 1195, 1196, 1197, 119...",[]
genxml-1.xml,1053,53,1000,0,1000,100627,95.562203,100.301887,95.311,45.639928,"{'AB': 125, 'A': 448, 'B': 127, 'O': 353}","{'B': 93, 'A': 377, 'O': 443, 'AB': 87}","[1199, 1200, 1201, 1202, 1203, 1204, 1205, 120...",[]
genxml-2.xml,1053,53,1000,0,1000,115231,109.431149,104.358491,109.7,45.515512,"{'O': 375, 'A': 432, 'B': 114, 'AB': 132}","{'O': 423, 'A': 384, 'B': 101, 'AB': 92}","[1230, 1231, 1232, 1233, 1234, 1235, 1236, 123...",[]
genxml-3.xml,1053,53,1000,0,1000,100135,95.094967,97.433962,94.971,45.579568,"{'A': 447, 'O': 356, 'B': 117, 'AB': 133}","{'B': 110, 'O': 425, 'A': 377, 'AB': 88}","[1200, 1201, 1202, 1203, 1204, 1205, 1206, 120...",[]
genxml-4.xml,1053,53,1000,0,1000,110058,104.518519,112.566038,104.092,45.505979,"{'A': 449, 'O': 387, 'AB': 116, 'B': 101}","{'AB': 75, 'O': 449, 'A': 366, 'B': 110}","[1215, 1216, 1217, 1218, 1219, 1220, 1221, 122...",[]
genxml-5.xml,1053,53,1000,0,1000,111403,105.795821,121.622642,104.957,45.447223,"{'A': 423, 'O': 357, 'AB': 156, 'B': 117}","{'B': 106, 'A': 387, 'O': 422, 'AB': 85}","[1171, 1172, 1173, 1174, 1175, 1176, 1177, 117...",[]
genxml-6.xml,1053,53,1000,0,1000,110105,104.563153,121.962264,103.641,45.496027,"{'B': 113, 'AB': 136, 'O': 365, 'A': 439}","{'O': 459, 'B': 103, 'A': 351, 'AB': 87}","[1198, 1199, 1200, 1201, 1202, 1203, 1204, 120...",[]
genxml-7.xml,1053,53,1000,0,1000,120373,114.31434,117.283019,114.157,45.477657,"{'O': 403, 'AB': 101, 'A': 427, 'B': 122}","{'AB': 93, 'A': 359, 'B': 120, 'O': 428}","[1217, 1218, 1219, 1220, 1221, 1222, 1223, 122...",[]
genxml-8.xml,1053,53,1000,0,1000,112866,107.185185,99.264151,107.605,45.496509,"{'A': 422, 'O': 393, 'B': 142, 'AB': 96}","{'A': 382, 'O': 426, 'B': 105, 'AB': 87}","[1205, 1206, 1207, 1208, 1209, 1210, 1211, 121...",[]
genxml-9.xml,1053,53,1000,0,1000,115236,109.435897,119.924528,108.88,45.577597,"{'O': 393, 'A': 430, 'B': 114, 'AB': 116}","{'AB': 88, 'A': 401, 'O': 392, 'B': 119}","[1186, 1187, 1188, 1189, 1190, 1191, 1192, 119...",[]


## Aggregate Metrics

High-level totals and averages across all instances.

In [4]:
counts = summary_df[[
    "donors_total",
    "donors_altruistic",
    "donors_paired",
    "recipients_total",
    "matches_total",
]].sum()
counts.to_frame(name="sum")

Unnamed: 0,sum
donors_total,10530
donors_altruistic,530
donors_paired,10000
recipients_total,10000
matches_total,1108992


In [4]:
averages = summary_df[[
    "avg_matches_per_donor",
    "avg_matches_altruistic",
    "avg_matches_paired",
    "avg_match_score",
]].mean()
averages.to_frame(name="mean")

Unnamed: 0,mean
avg_matches_per_donor,16.239241
avg_matches_altruistic,18.825
avg_matches_paired,16.101333
avg_match_score,46.108862


## Donor-Level Details

Look closer at the first instance to understand altruistic donors and match distributions.

In [5]:
example_instance = load_instance(instance_paths[0])

donors_df = pd.DataFrame([
    {
        "donor_id": donor.donor_id,
        "bloodtype": donor.bloodtype,
        "is_altruistic": donor.is_altruistic,
        "num_matches": donor.num_matches,
    }
    for donor in example_instance.donors
]).set_index("donor_id")
donors_df.head()

Unnamed: 0_level_0,bloodtype,is_altruistic,num_matches
donor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,A,False,9
1,O,False,32
2,A,False,14
3,A,False,12
4,A,False,12


In [6]:
donors_df[donors_df["is_altruistic"]].sort_values("num_matches", ascending=False)

Unnamed: 0_level_0,bloodtype,is_altruistic,num_matches
donor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
187,O,True,29
183,O,True,27
182,O,True,26
184,O,True,23
185,O,True,19
181,A,True,11
188,A,True,11
186,AB,True,2


In [7]:
donors_df.groupby("is_altruistic")["num_matches"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
is_altruistic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
False,150.0,13.606667,9.335576,0.0,7.0,10.0,23.75,34.0
True,8.0,18.5,9.591663,2.0,11.0,21.0,26.25,29.0


In [8]:
recipients_df = pd.DataFrame([
    {
        "recipient_id": recipient.recipient_id,
        "bloodtype": recipient.bloodtype,
        "cPRA": recipient.c_pra,
        "has_blood_compatible_donor": recipient.has_blood_compatible_donor,
    }
    for recipient in example_instance.recipients
]).set_index("recipient_id")

recipients_df["has_blood_compatible_donor"].value_counts()

has_blood_compatible_donor
True     82
False    68
Name: count, dtype: int64