## Imports

In [None]:
import pandas as pd
import os
from typing import List
import re
import numpy as np
import sys
import math
import time
import logging

In [None]:
sys.path.insert(0, "../..")

from local_testing_utilities.notebook_utils.pairing_data import parse_pairing_data
from local_testing_utilities.notebook_utils.survival_data import parse_survival_data

## Load data

In [None]:
df_all_patients = parse_pairing_data('data/KDP-processed', 'data/patients_list_recipientID.csv', remove_single_donors=False)

In [None]:
df_survival = parse_survival_data('data/LD_kidney_survival.csv')
df_survival_summary = df_survival[['RecipientID', 'StartDate', 'LastVisitDate', 'EndDate', 'delay', 'EndReason', 'NoVisits', 'donor_typization', 'recipient_typization']]

In [None]:
df_patients_with_recipient_id = pd.read_csv('data/patients_list_recipientID.csv')

### Patients that were seen in given event but were not seen in next event

#### Recipients

In [None]:
df_recipients_last_event = df_all_patients.loc[
    df_all_patients.apply(
        lambda row1:
        not df_all_patients.apply(
            lambda row2:
            row1.txm_event + 1 == row2.txm_event and \
            row1.recipient_name == row2.recipient_name,
            axis=1
        ).any() and \
        row1.recipient_name != '',
        axis=1
    )
]
print(len(df_recipients_last_event.index))
#df_recipients_last_event.head()

#### Donors

In [None]:
df_donors_last_event = df_all_patients.loc[
    df_all_patients.apply(
        lambda row1:
        not df_all_patients.apply(
            lambda row2:
            row1.txm_event + 1 == row2.txm_event and \
            row1.donor_name == row2.donor_name,
            axis=1
        ).any(),
        axis=1
    )
]
print(len(df_donors_last_event.index))
#df_donors_last_event.head()

How many patients were seen in given event but was not seen in next event

In [None]:
df_recipients_last_event.groupby('txm_event').count()[['recipient_name']].join(
    df_donors_last_event.groupby('txm_event').count()[['donor_name']]
).plot()

## Donors that ended in given txm event

In [None]:
txm_event = 24

In [None]:
df_donors_last_event.loc[lambda df: df.txm_event == txm_event]\
    [['txm_event', 'donor_name', 'donor_typization', 'recipient_name']]\
    .assign(typization_substr=lambda df: df.donor_typization.str[:6])

### Join patients last events with survival data

#### Recipients

In [None]:
df_recipients_last_event_with_surv = df_recipients_last_event.join(df_survival_summary.set_index('RecipientID'), on='recipient_id', rsuffix='_surv')
#df_recipients_last_event_with_surv

#### Donors

##### Transplants in the given txm event

In [None]:
df_recipients_last_event_with_surv\
    .loc[lambda df: df.delay.notnull()]\
    .loc[lambda df: df.txm_event == txm_event]\
    [['txm_event', 'donor_typization_surv', 'recipient_id']]\
    .assign(typization_substr=lambda df: df.donor_typization_surv.str[:6])\
    .join(a.set_index(['txm_event', 'typization_substr']), on=['txm_event', 'typization_substr'])

In [None]:
df_recipients_last_event_with_surv\
    .loc[lambda df: df.delay.notnull()]\
    [['txm_event', 'donor_typization_surv', 'recipient_id']]\
    .assign(typization_substr=lambda df: df.donor_typization_surv.str[:6])\
    .join(
        df_donors_last_event\
        [['txm_event', 'donor_name', 'donor_typization', 'recipient_name']]\
        .assign(typization_substr=lambda df: df.donor_typization.str[:6])\
        .set_index(['txm_event', 'typization_substr']),
        on=['txm_event', 'typization_substr']
    ).to_csv('data/transplanted_donors_draft.csv')

I modified the output by hand resulting to `transplanted_donorx.xlsx`, which contains donor to its transplant mapping

In [None]:
df_transplanted_donors = pd.read_excel('data/transplanted_donors.xlsx', index_col=None)
df_transplanted_donors.head()

In [None]:
df_donors_last_event_with_surv = df_donors_last_event\
    .join(df_transplanted_donors.set_index('donor_name')['target_recipient_id'], on='donor_name')\
    .join(df_survival_summary.set_index('RecipientID'), on='target_recipient_id', rsuffix='_surv')
#df_donors_last_event_with_surv.head()

#### Plot

In [None]:
df_recipients_last_event_with_surv.groupby('txm_event').count()\
    .join(df_donors_last_event_with_surv.groupby('txm_event').count(), rsuffix='_donors')\
    .apply(lambda row: pd.Series(
    {
        'Recipients ended': row.recipient_name,
        'Recipients ended with transplant found': row.delay,
        'Recipients ended without transplant': row.recipient_name - row.delay,
        'Donors ended': row.donor_name_donors,
        'Donors ended with transplant found': row.delay_donors,
        'Donors ended without transplant': row.donor_name_donors - row.delay_donors
    }), axis=1)\
    .plot(
        style=['b-','g-','r-', 'b--','g--','r--'],
        title='Number of patients that were lastly seen in the given txm event versus those that were mapped to transplant date',
        figsize=(14, 7)
    )

In [None]:
df_recipients_last_event_with_surv.plot(x='txm_event', y='StartDate', style=".", figsize=(10, 4), 
                                      title='Transplant dates for recipients that was lastly seen in th given txm event')
df_donors_last_event_with_surv.plot(x='txm_event', y='StartDate', style=".", figsize=(10, 4), 
                                      title='Transplant dates for donors that was lastly seen in th given txm event')

### Join last seen patients with their summary

In [None]:
df_recipient_to_event_list = df_all_patients.groupby(
    ['recipient_id']
)[['txm_event']].agg(lambda x: ",".join([str(i) for i in x]))

#df_recipient_to_event_list.reset_index().loc[lambda df: df.recipient_id == 1179062.0]

In [None]:
df_donor_to_event_list = df_all_patients.groupby(
    ['donor_name']
)[['txm_event']].agg(lambda x: ",".join([str(i) for i in x]))

#df_donor_to_event_list.reset_index().loc[lambda df: df.donor_name == "Holubová Věra"]

In [None]:
#df_recipient_to_event_list

In [None]:
#df_recipients_last_event_with_surv[['txm_event', 'recipient_id', 'delay']]

In [None]:
df_patient_to_survival_summary = df_survival_summary.set_index('RecipientID')

In [None]:
df_recipients_last_event_with_info = df_recipients_last_event[['txm_event', 'recipient_id']]\
    .join(df_patients_with_recipient_id.set_index('recipient_id'), on='recipient_id')\
    .join(df_recipient_to_event_list, on='recipient_id', rsuffix="_1")\
    .join(df_patient_to_survival_summary, on='recipient_id', rsuffix="_2")

In [None]:
df_recipients_last_event_with_info

Export

In [None]:
#df_recipients_last_event_with_info.\
#    loc[lambda r: r.StartDate.isnull()]\
#    [['txm_event', 'recipient_id', 'recipient_name', 'recipient_year_of_birth', 'txm_event_1']]\
#    .to_csv('data/ended_patients_without_transplant.csv', index=False)

List patients that were not found in next event but were not transplanted:

In [None]:
# df_recipients_last_event_with_info.loc[lambda r: r.StartDate.isnull()]

Namátkově jsem porovnal pacienty co v párování skončili s xls sheety s konečnými variantami. Pro každý event ti pacienti, pro které jsme našli survival data (byli transplantování) odpovídají těm ze sheetu s konečnými variantami plus pář transplantacím s originálním donorem.