In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import datetime
from scipy.stats import mannwhitneyu
from scipy.stats import chisquare
from scipy.interpolate import InterpolatedUnivariateSpline 
from scipy.stats import chi2_contingency

In [2]:
def chi_square_test(data1,data2):
    MIMIC = data1
    EPIC = data2
    all_value = sorted(np.unique(list(MIMIC)+list(EPIC)))
    MIMIC_list = []
    EPIC_list = []
    for value in all_value:
        EPIC_count = list(EPIC).count(value)
        MIMIC_count = list(MIMIC).count(value)
        MIMIC_list.append(MIMIC_count)
        EPIC_list.append(EPIC_count)

    obs = [MIMIC_list,EPIC_list]
    chi2, p, dof, ex = chi2_contingency(obs)
    print(f'Test statistics is {chi2} with degree of freedom as {dof} and p value is {p}')

### Continuous value
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html
- using Mann-Whitney U test
- pretransfusion
- value diff
- icu stay time in days 

### Ordinal value
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chisquare.html
- using chi-square test tests 
-  blood transfusion volume
- blood transfusion counts
- 24H HB measurements



### pre transfusion 

#### for only first blood transfusion

In [3]:
EPIC_pre_transfusion = np.load('EPIC_first_pre_transfusion.npy')
MIMIC_pre_transfusion = np.load('MIMIC_first_pre_transfusion.npy')
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html
result = mannwhitneyu(MIMIC_pre_transfusion,EPIC_pre_transfusion, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=13666.0, pvalue=8.15380726455595e-23)


In [4]:
MIMIC_pre_transfusion_male = np.load('MIMIC_first_pre_transfusion_male.npy')
MIMIC_pre_transfusion_female = np.load('MIMIC_first_pre_transfusion_female.npy')
result = mannwhitneyu(MIMIC_pre_transfusion_male,MIMIC_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=5332.0, pvalue=0.6099233301635223)


In [5]:
EPIC_pre_transfusion_male = np.load('EPIC_first_pre_transfusion_male.npy')
EPIC_pre_transfusion_female = np.load('EPIC_first_pre_transfusion_female.npy')
result = mannwhitneyu(EPIC_pre_transfusion_male,EPIC_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=428.5, pvalue=0.25231490861765615)


In [6]:
result = mannwhitneyu(MIMIC_pre_transfusion_male,EPIC_pre_transfusion_male, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=7932.0, pvalue=7.262532455562567e-17)


In [7]:
result = mannwhitneyu(MIMIC_pre_transfusion_female,EPIC_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=775.5, pvalue=1.432289664946833e-07)


### for all transfusion

In [8]:
EPIC_pre_transfusion = np.load('EPIC_pre_transfusion.npy')
MIMIC_pre_transfusion = np.load('MIMIC_pre_transfusion.npy')
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html
result = mannwhitneyu(MIMIC_pre_transfusion,EPIC_pre_transfusion, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=84020.5, pvalue=1.4240130545444609e-44)


In [9]:
MIMIC_pre_transfusion_male = np.load('MIMIC_pre_transfusion_male.npy')
MIMIC_pre_transfusion_female = np.load('MIMIC_pre_transfusion_female.npy')
result = mannwhitneyu(MIMIC_pre_transfusion_male,MIMIC_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=24479.0, pvalue=0.814504289266886)


In [10]:
EPIC_pre_transfusion_male = np.load('EPIC_pre_transfusion_male.npy')
EPIC_pre_transfusion_female = np.load('EPIC_pre_transfusion_female.npy')
result = mannwhitneyu(EPIC_pre_transfusion_male,EPIC_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=3719.5, pvalue=0.20588318541663164)


In [11]:
result = mannwhitneyu(MIMIC_pre_transfusion_male,EPIC_pre_transfusion_male, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=49043.0, pvalue=1.649227792347509e-33)


In [12]:
result = mannwhitneyu(MIMIC_pre_transfusion_female,EPIC_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=4676.0, pvalue=1.2391430801785419e-12)


### value diff 

In [13]:
EPIC_value_diff = np.load('EPIC_value_diff.npy')
MIMIC_value_diff = np.load('MIMIC_value_diff.npy')
result = mannwhitneyu(MIMIC_value_diff,EPIC_value_diff, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=53875.5, pvalue=0.08748466635362102)


In [14]:
MIMIC_value_diff_male = np.load('MIMIC_value_diff_male.npy')
MIMIC_value_diff_female = np.load('MIMIC_value_diff_female.npy')
result = mannwhitneyu(MIMIC_value_diff_male,MIMIC_value_diff_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=18655.5, pvalue=0.00019795118933318805)


In [15]:
EPIC_value_diff_male = np.load('EPIC_value_diff_male.npy')
EPIC_value_diff_female = np.load('EPIC_value_diff_female.npy')
result = mannwhitneyu(EPIC_value_diff_male,EPIC_value_diff_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=3026.0, pvalue=0.38771319246498737)


In [16]:
result = mannwhitneyu(MIMIC_value_diff_male,EPIC_value_diff_male, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=30558.0, pvalue=0.3970595478967751)


In [17]:
result = mannwhitneyu(MIMIC_value_diff_female,EPIC_value_diff_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=3306.5, pvalue=0.03384262478384122)


### ICU stay time

In [18]:
MIMIC_icu_stay_time_days = np.load('MIMIC_icu_stay_time_days.npy')
EPIC_icu_stay_time_days = np.load('EPIC_icu_stay_time_days.npy')
result = mannwhitneyu(MIMIC_icu_stay_time_days,EPIC_icu_stay_time_days, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=74305.5, pvalue=0.4174034494274639)


In [19]:
MIMIC_icu_stay_time_days_male = np.load('MIMIC_icu_stay_time_days_male.npy')
MIMIC_icu_stay_time_days_female = np.load('MIMIC_icu_stay_time_days_female.npy')
result = mannwhitneyu(MIMIC_icu_stay_time_days_male,MIMIC_icu_stay_time_days_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=43440.0, pvalue=0.0006363906358060763)


In [20]:
EPIC_icu_stay_time_days_male = np.load('EPIC_icu_stay_time_days_male.npy')
EPIC_icu_stay_time_days_female = np.load('EPIC_icu_stay_time_days_female.npy')
result = mannwhitneyu(EPIC_icu_stay_time_days_male,EPIC_icu_stay_time_days_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=1760.5, pvalue=0.4013939847344221)


In [21]:
result = mannwhitneyu(MIMIC_icu_stay_time_days_male,EPIC_icu_stay_time_days_male, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=65416.5, pvalue=0.14570243513947814)


In [22]:
result = mannwhitneyu(MIMIC_icu_stay_time_days_female,EPIC_icu_stay_time_days_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=1722.0, pvalue=0.8243207029489524)


### Time between first blood transfusion and pretransfusion

In [23]:
EPIC_time_first_pre_transfusion = np.load('EPIC_time_first_pre_transfusion.npy')
MIMIC_time_first_pre_transfusion = np.load('MIMIC_time_first_pre_transfusion.npy')
result = mannwhitneyu(MIMIC_time_first_pre_transfusion,EPIC_time_first_pre_transfusion, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=11377.5, pvalue=9.483276188455221e-10)


In [24]:
MIMIC_time_first_pre_transfusion_male = np.load('MIMIC_time_first_pre_transfusion_male.npy')
MIMIC_time_first_pre_transfusion_female = np.load('MIMIC_time_first_pre_transfusion_female.npy')
result = mannwhitneyu(MIMIC_time_first_pre_transfusion_male,MIMIC_time_first_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=4716.0, pvalue=0.38549952794848363)


In [25]:
EPIC_time_first_pre_transfusion_male = np.load('EPIC_time_first_pre_transfusion_male.npy')
EPIC_time_first_pre_transfusion_female = np.load('EPIC_time_first_pre_transfusion_female.npy')
result = mannwhitneyu(EPIC_time_first_pre_transfusion_male,EPIC_time_first_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=310.0, pvalue=0.45316932757962924)


In [26]:
result = mannwhitneyu(MIMIC_time_first_pre_transfusion_male,EPIC_time_first_pre_transfusion_male, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=6714.5, pvalue=6.985801945637139e-08)


In [27]:
result = mannwhitneyu(MIMIC_time_first_pre_transfusion_female,EPIC_time_first_pre_transfusion_female, use_continuity=False,method="asymptotic")
print(result)

MannwhitneyuResult(statistic=596.0, pvalue=0.006861876533284971)


### first blood transfusion volumn

In [28]:
MIMIC_first_blood_transfusion_volumn = np.load('MIMIC_first_blood_transfusion_volumn.npy')
EPIC_first_blood_transfusion_volumn = np.load('EPIC_first_blood_transfusion_volumn.npy')
chi_square_test(MIMIC_first_blood_transfusion_volumn,EPIC_first_blood_transfusion_volumn)

Test statistics is 293.10039787798405 with degree of freedom as 69 and p value is 2.1056045684382066e-29


In [29]:
MIMIC_blood_transfusion_volumn_male = np.load('MIMIC_blood_transfusion_volumn_male.npy')
MIMIC_blood_transfusion_volumn_female = np.load('MIMIC_blood_transfusion_volumn_female.npy')
chi_square_test(MIMIC_blood_transfusion_volumn_male,MIMIC_blood_transfusion_volumn_female)

Test statistics is 43.63381438894149 with degree of freedom as 24 and p value is 0.008412754355844314


In [30]:
EPIC_blood_transfusion_volumn_male = np.load('EPIC_blood_transfusion_volumn_male.npy')
EPIC_blood_transfusion_volumn_female = np.load('EPIC_blood_transfusion_volumn_female.npy')
chi_square_test(EPIC_blood_transfusion_volumn_male,EPIC_blood_transfusion_volumn_female)

Test statistics is 53.954248366013076 with degree of freedom as 51 and p value is 0.3621158753705871


In [31]:
chi_square_test(MIMIC_blood_transfusion_volumn_male,EPIC_blood_transfusion_volumn_male)

Test statistics is 450.9999999999999 with degree of freedom as 61 and p value is 7.272578480645389e-61


In [32]:
chi_square_test(MIMIC_blood_transfusion_volumn_female,EPIC_blood_transfusion_volumn_female)

Test statistics is 129.620720188902 with degree of freedom as 28 and p value is 5.076201246034779e-15


### blood transfusion counts

In [33]:
MIMIC_blood_transfusion_count = np.load('MIMIC_blood_transfusion_count.npy')
EPIC_blood_transfusion_count = np.load('EPIC_blood_transfusion_count.npy')
chi_square_test(MIMIC_blood_transfusion_count,EPIC_blood_transfusion_count)

Test statistics is 156.78857082510612 with degree of freedom as 12 and p value is 2.3673674145266455e-27


In [34]:
MIMIC_blood_transfusion_count_male = np.load('MIMIC_blood_transfusion_count_male.npy')
MIMIC_blood_transfusion_count_female = np.load('MIMIC_blood_transfusion_count_female.npy')
chi_square_test(MIMIC_blood_transfusion_count_male,MIMIC_blood_transfusion_count_female)

Test statistics is 12.61449861363754 with degree of freedom as 7 and p value is 0.08207699495325177


In [35]:
EPIC_blood_transfusion_count_male = np.load('EPIC_blood_transfusion_count_male.npy')
EPIC_blood_transfusion_count_female = np.load('EPIC_blood_transfusion_count_female.npy')
chi_square_test(EPIC_blood_transfusion_count_male,EPIC_blood_transfusion_count_female)

Test statistics is 126.82844308839731 with degree of freedom as 10 and p value is 2.0697128194397147e-22


In [36]:
chi_square_test(MIMIC_blood_transfusion_count_female,EPIC_blood_transfusion_count_male)

Test statistics is 79.05297665019967 with degree of freedom as 10 and p value is 7.695662344797563e-13


In [37]:
chi_square_test(MIMIC_blood_transfusion_count_female,EPIC_blood_transfusion_count_female)

Test statistics is 99.39037910515134 with degree of freedom as 7 and p value is 1.441494078019351e-18


### hemoglobin measurements in 24 H

In [38]:
MIMIC_hemoglobin_24h_Hb_count = np.load('MIMIC_hemoglobin_24h_Hb_count.npy')
EPIC_hemoglobin_24h_Hb_count = np.load('EPIC_hemoglobin_24h_Hb_count.npy')
chi_square_test(MIMIC_hemoglobin_24h_Hb_count,EPIC_hemoglobin_24h_Hb_count)

Test statistics is 407.03276424057464 with degree of freedom as 20 and p value is 7.097911669011485e-74


In [39]:
MIMIC_hemoglobin_24h_Hb_count_male = np.load('MIMIC_hemoglobin_24h_Hb_count_male.npy')
MIMIC_hemoglobin_24h_Hb_count_female = np.load('MIMIC_hemoglobin_24h_Hb_count_female.npy')
chi_square_test(MIMIC_hemoglobin_24h_Hb_count_male,MIMIC_hemoglobin_24h_Hb_count_female)

Test statistics is 17.315421621902072 with degree of freedom as 14 and p value is 0.2397583786939397


In [40]:
EPIC_hemoglobin_24h_Hb_count_male = np.load('EPIC_hemoglobin_24h_Hb_count_male.npy')
EPIC_hemoglobin_24h_Hb_count_female = np.load('EPIC_hemoglobin_24h_Hb_count_female.npy')
chi_square_test(EPIC_hemoglobin_24h_Hb_count_male,EPIC_hemoglobin_24h_Hb_count_female)

Test statistics is 15.523718641469866 with degree of freedom as 19 and p value is 0.6887809001539269


In [41]:
chi_square_test(MIMIC_hemoglobin_24h_Hb_count_male,EPIC_hemoglobin_24h_Hb_count_male)

Test statistics is 338.4010802563972 with degree of freedom as 20 and p value is 1.0878755690068122e-59


In [42]:
chi_square_test(MIMIC_hemoglobin_24h_Hb_count_female,EPIC_hemoglobin_24h_Hb_count_female)

Test statistics is 82.32293659590836 with degree of freedom as 14 and p value is 1.0467952482368525e-11
