In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

In [None]:
## info of patients with ibd
file_path = os.path.join('data', 'patients_ibd.csv')

patients_ibd = pd.read_csv(file_path)
patients_ibd.head()

In [None]:
# Mortality: live or death
live_patients = patients_ibd[~pd.notna(patients_ibd.dod)]
dead_patients = patients_ibd[pd.notna(patients_ibd.dod)]

print('Number of live patients:', live_patients.shape[0])
print('Number of dead patients:', dead_patients.shape[0])
mortality_rate = dead_patients.shape[0] / (live_patients.shape[0] + dead_patients.shape[0])
print('Mortality rate:', round(mortality_rate, 2))

In [None]:
def describe_each_attr(df: pd.DataFrame, not_needed=['subject_id', 'anchor_age', 'anchor_year', 'dod']) -> None:
    for col_name in df.columns:
        if col_name in not_needed: continue
        print(df[col_name].value_counts())
        print()

describe_each_attr(patients_ibd)

In [None]:
# How long patients who would die could live? 
days_since_anchor = pd.to_datetime(dead_patients.dod, format='%Y-%m-%d') - pd.to_datetime(dead_patients.anchor_year, format='%Y')
dead_patients.loc[:, 'days_since_anchor'] = days_since_anchor

In [None]:
# Group by ... from days_since_anchor
def groupby_a_from_b(df, a, b):
    return df.loc[:, [a, b]].groupby(a).mean()

print(groupby_a_from_b(dead_patients, 'gender', 'days_since_anchor'))
print(groupby_a_from_b(dead_patients, 'anchor_year_group', 'days_since_anchor'))
print(groupby_a_from_b(dead_patients, 'anchor_age', 'days_since_anchor'))

In [None]:
groupby_age_from_days_since_anchor = groupby_a_from_b(dead_patients, 'anchor_age', 'days_since_anchor')
x = groupby_age_from_days_since_anchor.index
y = groupby_age_from_days_since_anchor.days_since_anchor.dt.days

plt.figure(figsize=(10, 5), dpi=300)
plt.bar(x, y, width=1)
plt.ylabel('mean days since anchor', fontsize=12)
plt.xlabel('anchor age', fontsize=12)

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.serif'] = [r'C:\Windows\Fonts\Times New Roman.ttf']
plt.show()