## Imports

In [None]:
import pandas as pd
import os
from typing import List
import re
import numpy as np
import sys
import math
import time
import logging

In [None]:
sys.path.insert(0, "../..")

from local_testing_utilities.notebook_utils.pairing_data import parse_pairing_data
from local_testing_utilities.notebook_utils.survival_data import parse_survival_data
from txmatching.utils.blood_groups import BloodGroup
from txmatching.utils.country_enum import Country
from txmatching.patients.patient import Donor, Recipient
from txmatching.patients.patient_parameters import PatientParameters
from tests.test_utilities.hla_preparation_utils import (create_antibodies, create_antibody,
                                                        create_hla_typing)
from tests.test_utilities.prepare_app_for_tests import DbTests
from txmatching.patients.patient import TxmEvent
from txmatching.utils.enums import TxmEventState
from txmatching.configuration.configuration import Configuration
from txmatching.solve_service.solve_from_configuration import solve_from_configuration

## Load data

In [None]:
df_all_patients = parse_pairing_data('data/KDP-processed', 'data/patients_list_recipientID.csv', remove_single_donors=True)

In [None]:
df_survival = parse_survival_data('data/LD_kidney_survival.csv')
df_survival_summary = df_survival[['RecipientID', 'StartDate', 'LastVisitDate', 'EndDate', 'delay', 'EndReason', 'NoVisits', 'donor_typization', 'recipient_typization']]

In [None]:
df_patients_with_recipient_id = pd.read_csv('data/patients_list_recipientID.csv')

### Patients that were seen in given event but were not seen in next event

In [None]:
df_patients_last_event = df_all_patients.loc[
    df_all_patients.apply(
        lambda row1:
        not df_all_patients.apply(
            lambda row2:
            row1.txm_event + 1 == row2.txm_event and \
            row1.recipient_name == row2.recipient_name,
            axis=1
        ).any(),
        axis=1
    )
]
print(len(df_patients_last_event.index))
df_patients_last_event.head()

How many patients were seen in given event but was not seen in next event

In [None]:
df_patients_last_event.groupby('txm_event').count()['recipient_name'].plot()

### Join patients last events with survival data

In [None]:
df_patients_last_event_with_surv = df_patients_last_event.join(df_survival_summary.set_index('RecipientID'), on='recipient_id', rsuffix='_surv')
df_patients_last_event_with_surv

df_patients_last_event_with_surv.groupby('txm_event').count()\
    .apply(lambda row: pd.Series({'ended': row.recipient_name, 'ended with transplant found': row.delay, 'ended without transplant': row.recipient_name - row.delay}), axis=1)\
    .plot(title='Number of patients that were lastly seen in the given txm event versus those that were mapped to transplant date', figsize=(12, 5))

In [None]:
df_patients_last_event_with_surv.plot(x='txm_event', y='StartDate', style=".", figsize=(10, 7), 
                                      title='Transplant dates for patients that was lastly seen in th given txm event')

### Join last seen patients with their summary

In [None]:
df_patient_to_event_list = df_all_patients.groupby(
    ['recipient_id']
)[['txm_event']].agg(lambda x: ",".join([str(i) for i in x]))

In [None]:
df_patient_to_survival_summary = df_survival_summary.set_index('RecipientID')

In [None]:
df_patients_last_event_with_info = df_patients_last_event[['txm_event', 'recipient_id']]\
    .join(df_patients_with_recipient_id.set_index('recipient_id'), on='recipient_id')\
    .join(df_patient_to_event_list, on='recipient_id', rsuffix="_1")\
    .join(df_patient_to_survival_summary, on='recipient_id', rsuffix="_2")

Export

In [None]:
df_patients_last_event_with_info.\
    loc[lambda r: r.StartDate.isnull()]\
    [['txm_event', 'recipient_id', 'recipient_name', 'recipient_year_of_birth', 'txm_event_1']]\
    .to_csv('data/ended_patients_without_transplant.csv', index=False)

List patients that were not found in next event but were not transplanted:

In [None]:
# df_patients_last_event_with_info.loc[lambda r: r.StartDate.isnull()]

Namátkově jsem porovnal pacienty co v párování skončili s xls sheety s konečnými variantami. Pro každý event ti pacienti, pro které jsme našli survival data (byli transplantování) odpovídají těm ze sheetu s konečnými variantami plus pář transplantacím s originálním donorem.

## Run solver on data with various granularity

First we load the patients in old txm events and join survival data to them

In [None]:
df_survival_summary = pd.read_pickle('data/survival_summary.pkl').set_index('RecipientID')

In [None]:
df_all_patients_with_survival = df_all_patients\
    .join(df_survival_summary, on='recipient_id', rsuffix='_r')\
    .assign(has_transplant=lambda df: df.delay.notnull())\
    [lambda df: df.recipient_id.notnull()]
#df_all_patients_with_survival

Now we define function that returns patients that would be in txm event with different granularity.

For given granularity, each event has patients from the originla event plus patients from $granularity - 1$ previous events that have been transplanted.

In [None]:
def get_patients_for_granularity(txm_event, granularity):
    df_patients_for_granularity = df_all_patients_with_survival.loc[
        lambda df:
        (df.txm_event == txm_event) | 
        ((df.txm_event > txm_event - granularity) & (df.txm_event < txm_event) & df.has_transplant)
    ]
    df_patients_for_granularity = df_patients_for_granularity.drop_duplicates(subset=['recipient_id']).set_index('recipient_id')
    return df_patients_for_granularity

# get_patients_for_granularity(txm_event=26, granularity=2)

Before we run the solver, we make some config

In [None]:
# Initialize db
test = DbTests()
test.setUp()

In [None]:
logger = logging.getLogger()
logger.setLevel('WARN')

In [None]:
# test.tearDown()

Now we can run the solver. We define several functions for that.

In [None]:
def row_to_patient_pair(db_id, row):
    donor_typization = row.donor_typization
    recipient_typization = row.recipient_typization
    recipient_antibodies = row.recipient_luminex_2
    
    if donor_typization == '' or recipient_typization == '':
        return None
    
    if recipient_antibodies == '':
        # TODO: currently we ignore this patients because we do not have antibodies data, we need to have them probably
        return None
    
    donor_typing = donor_typization.split(" ")
    recipient_typing = recipient_typization.split(" ")
    recipient_antibodies = recipient_antibodies.split(" ")
    
    donor = Donor(
        db_id=db_id,
        medical_id=f'donor_{db_id}',
        related_recipient_db_id=db_id,
        parameters=PatientParameters(
            blood_group=BloodGroup.A,
            country_code=Country.CZE,
            hla_typing=create_hla_typing(
                donor_typing
            )
        )
    )
    
    recipient = Recipient(
        db_id=db_id,
        acceptable_blood_groups=[],
        related_donor_db_id=db_id,
        medical_id=f'recipient_{db_id}',
        parameters=PatientParameters(
            blood_group=BloodGroup.A,
            country_code=Country.CZE,
            hla_typing=create_hla_typing(recipient_typing)
        ),
        hla_antibodies=create_antibodies([create_antibody(raw_code, 2000, 2000) for raw_code in recipient_antibodies])
    )
    
    return donor, recipient

In [None]:
def compute_for_patients(df_patients):
    donors = []
    recipients = []
    for index, row in df_patients.iterrows():
        maybe_patient_pair = row_to_patient_pair(int(index), row)
        if maybe_patient_pair is None:
            continue
        
        donor, recipient = maybe_patient_pair
        
        donors.append(donor)
        recipients.append(recipient)

    txm_event = TxmEvent(1, 'sample_event', None, TxmEventState.OPEN, donors, recipients)
    configuration = Configuration(max_number_of_matchings=1)
    pairing_result = solve_from_configuration(configuration, txm_event=txm_event)

    matchings_count = len(pairing_result.calculated_matchings_list)
    if matchings_count > 0:
        matching = pairing_result.calculated_matchings_list[0]
        matching_pairs_count = len(matching.get_donor_recipient_pairs())
    else:
        matching_pairs_count = 0
    
    return matching_pairs_count, len(donors), len(recipients)

#df_patients = get_patients_for_granularity(txm_event=12, granularity=1)
#compute_for_patients(df_patients)

In [None]:
d = []

for txm_event in range(10, 31):
    for granularity in range(1, 5):
        df_patients = get_patients_for_granularity(txm_event=txm_event, granularity=granularity)
        
        patients_count = len(df_patients.index)
        
        print(f"Computing matching for txm_event {txm_event} and granularity {granularity} ({patients_count} patients)", end=" ")
        start = time.time()
        matching_pairs_count, valid_donors, valid_recipients = compute_for_patients(df_patients)
        elapsed_time = time.time() - start
        print(f"-> {matching_pairs_count} transplants found ({elapsed_time:.2f} seconds)")
        
        d.append({
            'txm_event': txm_event,
            'granularity': granularity,
            'patients_count': patients_count,
            'valid_donors': valid_donors,
            'valid_recipients': valid_recipients,
            'matching_pairs_count': matching_pairs_count,
            'matching_pairs_count_normalized': matching_pairs_count / granularity,
            'elapsed_time': elapsed_time
        })

df_granularity_results = pd.DataFrame(d)
df_granularity_results

### Results

Everything is computed now. Lets show some plots

In [None]:
df_granularity_results.pivot_table(index='txm_event', columns='granularity', values='patients_count').plot(ylabel='Patients count')
df_granularity_results.pivot_table(index='txm_event', columns='granularity', values='valid_donors').plot(ylabel='Valid donors')
df_granularity_results.pivot_table(index='txm_event', columns='granularity', values='matching_pairs_count').plot(ylabel='matching_pairs_count')
df_granularity_results.pivot_table(index='txm_event', columns='granularity', values='matching_pairs_count_normalized').plot(ylabel='matching_pairs_count_normalized')
df_granularity_results.pivot_table(index='txm_event', columns='granularity', values='elapsed_time').plot(ylabel='elapsed_time (s)')