## Imports

In [2]:
import pandas as pd
from typing import *
from datetime import datetime, timedelta

## Functions

In [3]:
def to_dates(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
    """Gets dataframe and soecific columns and convert the columns type to DateTime"""
    for col in cols:
        df[col] = pd.to_datetime(df[col], format='mixed')
    return df

## Patients table

In [4]:
patients_df = pd.read_csv('data/patients.csv')
patients_df = to_dates(patients_df, ['admission_time', 'discharge_time', 'birth_date', 'death_date'])

In [5]:
patients_df = patients_df.loc[patients_df['patient_id'].notna()]

> patients table statistics: there are 9,707 unique patients and total of 16,764 cases.
> After removing rows with patients ID equals to NaN there are 9,706 unique patients IDs.

In [6]:
patients_df['case_no'].nunique()

16764

## Labs

In [7]:
labs1_df = pd.read_csv('data/labs1.csv')
labs2_df = pd.read_csv('data/labs2.csv')
labs3_df = pd.read_csv('data/labs3.csv')

  labs1_df = pd.read_csv('data/labs1.csv')
  labs2_df = pd.read_csv('data/labs2.csv')


In [8]:
labs_df = pd.concat([labs1_df, labs2_df])
labs_df = pd.concat([labs_df, labs3_df])
labs_df = to_dates(labs_df, ['LAB_TIME', 'LAB_DATE'])

## defining the target

> Merge labs table with patients table to calculate creatinine after admission.
> After merging, there are 16,764 cases numbers, 9,706 patients. When reducing the table only for creatinine-BL, there are 16,709 cases, 9,684 cases.

In [10]:
labs_patients_df = pd.merge(labs_df, patients_df, on=['case_no', 'patient_id'], how='right')

In [12]:
creatinine_df = labs_patients_df.loc[labs_patients_df['LAB_EXAMINATION_NAME'] == 'Creatinine-BL']

In [None]:
times_df = creatinine_df[['case_no', 'patient_id', 'admission_time', 'LAB_TIME', 'LAB_DATE', 'LAB_HOUR', 'exam_numeric_result']]

In [None]:
times_df['difference'] = times_df['LAB_TIME'] - times_df['admission_time']
times_df = times_df.loc[times_df['difference'].dt.components['days'] < 1]

In [18]:
# taking the first lab for each case number after the admission
first_creatinine_lab = times_df.groupby('case_no')['LAB_TIME'].first().reset_index()

In [None]:
first_creatinine_res = pd.merge(first_creatinine_lab, times_df, on=['case_no', 'LAB_TIME'], how='left')
first_creatinine_res.drop_duplicates(subset='case_no', keep='first', inplace=True)
first_creatinine_res.rename(columns={'exam_numeric_result': 'first_creatinine_test'}, inplace=True)
first_creatinine_res = first_creatinine_res[['case_no', 'first_creatinine_test']]

In [None]:
creatinine_tests_df = pd.merge(first_creatinine_res, creatinine_df, on='case_no', how='left')

In [None]:
creatinine_tests_df['creatinine_diff'] = creatinine_tests_df['exam_numeric_result'] - creatinine_tests_df['first_creatinine_test']

In [23]:
creatinine_tests_df = creatinine_tests_df.loc[creatinine_tests_df['creatinine_diff'].notna()]

In [25]:
aki_patients = creatinine_tests_df.loc[creatinine_tests_df['creatinine_diff'] >= 0.3]
no_aki_patients = creatinine_tests_df.loc[~creatinine_tests_df['case_no'].isin(aki_patients['case_no'])]

In [26]:
aki_patients['case_no'].nunique()

4608

In [27]:
no_aki_patients['case_no'].nunique()

11878

In [28]:
aki_admission_df = pd.merge(aki_patients, patients_df[['case_no', 'admission_time']], on=['case_no', 'admission_time'], how='left')[['case_no', 'admission_time']].drop_duplicates()
no_aki_admission_df = pd.merge(no_aki_patients, patients_df[['case_no', 'admission_time']], on=['case_no', 'admission_time'], how='left')[['case_no', 'admission_time']].drop_duplicates()

In [29]:
no_aki_admission_df.to_csv('labels/no_aki_admission.csv')
aki_admission_df.to_csv('labels/aki_admission.csv')