## Imports

In [None]:
import pandas as pd
import os
from typing import List
import re
import numpy as np
import sys
import math

In [None]:
sys.path.insert(0, "../..")

from local_testing_utilities.notebook_utils.pairing_data import parse_pairing_data
from local_testing_utilities.notebook_utils.survival_data import parse_survival_data

## Load data

In [None]:
df_all_patients = parse_pairing_data('data/KDP-processed', 'data/patients_list_recipientID.csv', remove_single_donors=True)

In [None]:
df_survival = parse_survival_data('data/LD_kidney_survival.csv')
df_survival_summary = df_survival[['RecipientID', 'StartDate', 'LastVisitDate', 'EndDate', 'delay', 'EndReason', 'NoVisits', 'donor_typization', 'recipient_typization']]

In [None]:
df_patients_with_recipient_id = pd.read_csv('data/patients_list_recipientID.csv')

### Patients that were seen in given event but were not seen in next event

In [None]:
df_patients_last_event = df_all_patients.loc[
    df_all_patients.apply(
        lambda row1:
        not df_all_patients.apply(
            lambda row2:
            row1.txm_event + 1 == row2.txm_event and \
            row1.recipient_name == row2.recipient_name,
            axis=1
        ).any(),
        axis=1
    )
]
print(len(df_patients_last_event.index))
df_patients_last_event.head()

How many patients were seen in given event but was not seen in next event

In [None]:
df_patients_last_event.groupby('txm_event').count()['recipient_name'].plot()

### Join patients last events with survival data

In [None]:
df_patients_last_event_with_surv = df_patients_last_event.join(df_survival_summary.set_index('RecipientID'), on='recipient_id', rsuffix='_surv')
df_patients_last_event_with_surv

df_patients_last_event_with_surv.groupby('txm_event').count()\
    .apply(lambda row: pd.Series({'ended': row.recipient_name, 'ended with transplant found': row.delay}), axis=1)\
    .plot(title='Number of patients that were lastly seen in the given txm event versus those that were mapped to transplant date', figsize=(12, 5))

In [None]:
df_patients_last_event_with_surv.plot(x='txm_event', y='StartDate', style=".", figsize=(10, 7), 
                                      title='Transplant dates for patients that was lastly seen in th given txm event')

### Join last seen patients with their summary

In [None]:
df_patient_to_event_list = df_all_patients.groupby(
    ['recipient_id']
)[['txm_event']].agg(lambda x: ",".join([str(i) for i in x]))

In [None]:
df_patient_to_survival_summary = df_survival_summary.set_index('RecipientID')

In [None]:
df_patients_last_event_with_info = df_patients_last_event[['txm_event', 'recipient_id']]\
    .join(df_patients_with_recipient_id.set_index('recipient_id'), on='recipient_id')\
    .join(df_patient_to_event_list, on='recipient_id', rsuffix="_1")\
    .join(df_patient_to_survival_summary, on='recipient_id', rsuffix="_2")

List patients that were not found in next event but were not transplanted:

In [None]:
df_patients_last_event_with_info.loc[lambda r: r.StartDate.isnull()]

Namátkově jsem porovnal pacienty co v párování skončili s xls sheety s konečnými variantami. Pro každý event ti pacienti, pro které jsme našli survival data (byli transplantování) odpovídají těm ze sheetu s konečnými variantami plus pář transplantacím s originálním donorem.