In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import interp, stats
import pandas as pd
import os
import datetime
import pytz
import sys
import json
from tqdm import tqdm
import math

from scipy.stats import chi2_contingency
from scipy.stats import chi2, kstest, ranksums
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from edm.utils.measures import perf_measure, calculate_output_statistics

pd.set_option('display.max_columns', None)

In [None]:
def print_statistics(df_pos, df_neg):
    print(f"Shape {df_pos.shape} for outcome = 1 {df_pos.shape[0] / (df_pos.shape[0] + df_neg.shape[0])}")
    print(f"Shape {df_neg.shape} for outcome = 0")
    print(f"Shape {(df_pos.shape[0] + df_neg.shape[0])} for total")
    print(f"---")
    def print_numeric(col):
        pos_stat = f"{df_pos[col].describe()['50%']} [{df_pos[col].describe()['25%']}-{df_pos[col].describe()['75%']}]"
        neg_stat = f"{df_neg[col].describe()['50%']} [{df_neg[col].describe()['25%']}-{df_neg[col].describe()['75%']}]"
        pos_missing = df_pos[col].isna().sum()
        neg_missing = df_neg[col].isna().sum()
        w, pval_raw = ranksums(df_pos[col], df_neg[col])
        if pval_raw < 0.001:
            p_val = "< 0.001"
        else:
            p_val = f"{pval_raw:.3f}"
        print(f"{col}, {pos_stat}, {pos_missing}, {neg_stat}, {neg_missing}, {p_val}")
    
    def print_binary_col(col, val_1, val_2):
        contingency_table = [
            [df_pos[df_pos[col] == val_2].shape[0], df_neg[df_neg[col] == val_2].shape[0]],
            [df_pos[df_pos[col] == val_1].shape[0], df_neg[df_neg[col] == val_1].shape[0]]
        ]

        stat, pval_raw, dof, expected = chi2_contingency(contingency_table, correction=False)
        pos_f = df_pos[df_pos[col] == val_1].shape[0]
        pos_m = df_pos[df_pos[col] == val_2].shape[0]
        neg_f = df_neg[df_neg[col] == val_1].shape[0]
        neg_m = df_neg[df_neg[col] == val_2].shape[0]
        pos_f_missing = df_pos[df_pos[col] == val_1][col].isna().sum()
        neg_f_missing = df_neg[df_neg[col] == val_1][col].isna().sum()
        pos_m_missing = df_pos[df_pos[col] == val_2][col].isna().sum()
        neg_m_missing = df_neg[df_neg[col] == val_2][col].isna().sum()
        
        if pval_raw < 0.001:
            p = "< 0.001"
        else:
            p = f"{pval_raw:.3f}"
        
        print(f"{col} - {val_1}, {pos_f} ({round(100*pos_f/(pos_f+pos_m), 2)}), {pos_f_missing}, {neg_f} ({round(100*neg_f/(neg_f+neg_m), 2)}), {neg_f_missing}, {p}")
        print(f"{col} - {val_2}, {pos_m} ({round(100*pos_m/(pos_f+pos_m), 2)}), {pos_m_missing}, {neg_m} ({round(100*neg_m/(neg_f+neg_m), 2)}), {neg_m_missing},")
    
    def print_risk_factor(col):
        pos_missing = df_pos[col].isna().sum()
        neg_missing = df_neg[col].isna().sum()
        pval_raw = stats.ttest_ind(df_pos[col], df_neg[col], nan_policy="omit", equal_var=False).pvalue
        if pval_raw < 0.001:
            pval = "< 0.001"
        else:
            pval = f"{pval_raw:.3f}"
        print(f"{col}, {int(df_pos[col].sum())} ({round(100 * df_pos[col].sum() / (len(df_pos[col]) - int(df_pos[col].isna().sum())), 2)}), {pos_missing}, {int(df_neg[col].sum())} ({round(100 * df_neg[col].sum() / (len(df_neg[col]) - int(df_neg[col].isna().sum())), 2)}), {neg_missing}, {pval}")

    # ---
    print("Characteristic, Included Statistic, Included Missing, Excluded Statistic, Excluded Missing, p-value")
    print_numeric("Age")
    print_binary_col("Gender", "F", "M")
    print_binary_col("Acuity_high", True, False)
    print_binary_col("Acuity_1", True, False)
    print_binary_col("Acuity_2", True, False)
    print_binary_col("Acuity_3", True, False)
    print_binary_col("Acuity_4", True, False)
    print_binary_col("Acuity_5", True, False)
    print_binary_col("Admitted", True, False)
    
#      '1-Resuscitation',
#      '2-Emergent',
#      '3-Urgent',
#      '4-Semi-Urgent',
#      '5-Non-Urgent
    print_numeric("Triage_SpO2")
    print_numeric("Triage_RR")
    print_numeric("Triage_HR")
    print_numeric("Triage_Temp")
    print_numeric("Triage_SBP")
    print_numeric("Triage_DBP")
    print_numeric("Triage_MAP")
    print_numeric("ED_LOS")


In [None]:
admission_set = ["Transfer to LPCH/PEC", "Admit to Inpatient", "Place in Observation", 
                 "Place in Observation-CDU", "Transfer to Psych", "Transfer to Outside Facility/Hospital", 
                 "Decision to Admit", "Place in Outpatient Procedure", "Send to Labor and Delivery"]

# 60 Min

## Load Files

In [None]:
with open("/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_all_60min.json", "r") as f:
    data_ids = json.load(f)
    

In [None]:
data_ids.keys()

In [None]:
print(len(data_ids["train_ids"]))
print(len(data_ids["val_ids"]))
print(len(data_ids["test_ids"]))

In [None]:
final_ids = []
final_ids.extend(data_ids["train_ids"])
final_ids.extend(data_ids["val_ids"])
final_ids.extend(data_ids["test_ids"])
print(len(final_ids))

In [None]:
df_labels = pd.read_csv("/deep/group/ed-monitor-self-supervised/test_models_v1/final_60min_labels_053022.csv")
df_labels = df_labels[df_labels["CSN"].isin(final_ids)]
print(df_labels.shape)
df_labels.head(2)

In [None]:
df_tachy = df_labels[df_labels["HR"] > 110]
print(df_tachy.shape)
df_tachy_pos = set(df_tachy["CSN"].tolist())

In [None]:
df_hypoxic = df_labels[df_labels["SPO2"] < 90]
print(df_hypoxic.shape)
df_hypoxic_pos = set(df_hypoxic["CSN"].tolist())
print(len(df_hypoxic_pos))

In [None]:
df_hypotensive = df_labels[df_labels["MAP"] < 65]
print(df_hypotensive.shape)
df_hypotensive_pos = set(df_hypotensive["CSN"].tolist())
print(len(df_hypotensive_pos))

In [None]:
df = pd.read_csv("/deep/group/ed-monitor-self-supervised/v4/visits_ssl_2022_05_23.csv")
df = df[df["CSN"].isin(final_ids)]
print(df.shape)
df.head(3)

In [None]:
df.groupby(["Triage_acuity"]).describe()

In [None]:
df["Acuity_high"] = (df["Triage_acuity"] == "1-Resuscitation") | (df["Triage_acuity"] == "2-Emergent")

In [None]:
df["Acuity_1"] = (df["Triage_acuity"] == "1-Resuscitation")
df["Acuity_2"] = (df["Triage_acuity"] == "2-Emergent")
df["Acuity_3"] = (df["Triage_acuity"] == "3-Urgent")
df["Acuity_4"] = (df["Triage_acuity"] == "4-Semi-Urgent")
df["Acuity_5"] = (df["Triage_acuity"] == "5-Non-Urgent")

In [None]:
df["Acuity_high"]

In [None]:
set(df["Triage_acuity"].tolist())

In [None]:
df['Admitted'] = [val in admission_set for val in df["ED_dispo"]]
df['Admitted']

In [None]:
df["outcome_tachycardic"] = df.apply (lambda row: int(row["CSN"] in df_tachy_pos), axis=1)
df["outcome_hypotensive"] = df.apply (lambda row: int(row["CSN"] in df_hypotensive_pos), axis=1)
df["outcome_hypoxic"] = df.apply (lambda row: int(row["CSN"] in df_hypoxic_pos), axis=1)
df.head(3)

In [None]:
df["Align_from_arrival"] = df["Arrival_to_roomed"] + df["Align_from_roomed"]
df.head(1)

## Statistics

In [None]:
# CI difference in populations
# https://www.dummies.com/education/math/statistics/creating-a-confidence-interval-for-the-difference-of-two-means-with-known-standard-deviations/

def diff_in_cols(df_pos, df_neg, col):
    acs_std = df_pos[col].describe()["std"]
    nonacs_std = df_neg[col].describe()["std"]
    ci_diff = 1.96 * math.sqrt(((acs_std ** 2) / df_pos.shape[0]) + ((nonacs_std ** 2) / df_neg.shape[0]))
    diff = df_pos[col].describe()["50%"] - df_neg[col].describe()["50%"] 
    print(f"{col} difference = {round(diff, 3)} [{round(diff - ci_diff, 3)}-{round(diff + ci_diff, 3)}]")


### Tachycardic

In [None]:
df_pos = df[df["outcome_tachycardic"] == 1]
df_neg = df[df["outcome_tachycardic"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_neg, df_pos, "Age")

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

### Hypoxic

In [None]:
df_pos = df[df["outcome_hypoxic"] == 1]
df_neg = df[df["outcome_hypoxic"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_pos, df_neg, "Age")

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_neg, df_pos, "Triage_SpO2")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

### Hypotension

In [None]:
df_pos = df[df["outcome_hypotensive"] == 1]
df_neg = df[df["outcome_hypotensive"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_neg, df_pos, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

# 90 Min

## Load Files

In [None]:
with open("/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_all_90min.json", "r") as f:
    data_ids = json.load(f)
    

In [None]:
data_ids.keys()

In [None]:
print(len(data_ids["train_ids"]))
print(len(data_ids["val_ids"]))
print(len(data_ids["test_ids"]))

In [None]:
final_ids = []
final_ids.extend(data_ids["train_ids"])
final_ids.extend(data_ids["val_ids"])
final_ids.extend(data_ids["test_ids"])
print(len(final_ids))

In [None]:
df_labels = pd.read_csv("/deep/group/ed-monitor-self-supervised/test_models_v1/final_90min_labels_053022.csv")
df_labels = df_labels[df_labels["CSN"].isin(final_ids)]
print(df_labels.shape)
df_labels.head(2)

In [None]:
df_labels_60 = pd.read_csv("/deep/group/ed-monitor-self-supervised/test_models_v1/final_60min_labels_053022.csv")
df_labels_60 = df_labels_60[df_labels_60["CSN"].isin(final_ids)]
print(df_labels_60.shape)
df_labels_60.head(2)

In [None]:
df_tachy = df_labels_60[df_labels_60["HR"] > 110]
print(df_tachy.shape)
df_tachy_pos = set(df_tachy["CSN"].tolist())

In [None]:
df_tachy = df_labels[df_labels["HR"] > 110]
print(df_tachy.shape)
df_tachy_pos = set(df_tachy["CSN"].tolist())

In [None]:
df_hypoxic = df_labels[df_labels["SPO2"] < 90]
print(df_hypoxic.shape)
df_hypoxic_pos = set(df_hypoxic["CSN"].tolist())
print(len(df_hypoxic_pos))

In [None]:
df_hypotensive = df_labels[df_labels["MAP"] < 65]
print(df_hypotensive.shape)
df_hypotensive_pos = set(df_hypotensive["CSN"].tolist())
print(len(df_hypotensive_pos))

In [None]:
df = pd.read_csv("/deep/group/ed-monitor-self-supervised/v4/visits_ssl_2022_05_23.csv")
df = df[df["CSN"].isin(final_ids)]
print(df.shape)
df.head(3)

In [None]:
df.groupby(["Triage_acuity"]).describe()

In [None]:
df["Acuity_high"] = (df["Triage_acuity"] == "1-Resuscitation") | (df["Triage_acuity"] == "2-Emergent")

In [None]:
df["Acuity_1"] = (df["Triage_acuity"] == "1-Resuscitation")
df["Acuity_2"] = (df["Triage_acuity"] == "2-Emergent")
df["Acuity_3"] = (df["Triage_acuity"] == "3-Urgent")
df["Acuity_4"] = (df["Triage_acuity"] == "4-Semi-Urgent")
df["Acuity_5"] = (df["Triage_acuity"] == "5-Non-Urgent")

In [None]:
df["Acuity_high"]

In [None]:
set(df["Triage_acuity"].tolist())

In [None]:
df['Admitted'] = [val in admission_set for val in df["ED_dispo"]]
df['Admitted']

In [None]:
df["outcome_tachycardic"] = df.apply (lambda row: int(row["CSN"] in df_tachy_pos), axis=1)
df["outcome_hypotensive"] = df.apply (lambda row: int(row["CSN"] in df_hypotensive_pos), axis=1)
df["outcome_hypoxic"] = df.apply (lambda row: int(row["CSN"] in df_hypoxic_pos), axis=1)
df.head(3)

In [None]:
df["Align_from_arrival"] = df["Arrival_to_roomed"] + df["Align_from_roomed"]
df.head(1)

## Statistics

In [None]:
# CI difference in populations
# https://www.dummies.com/education/math/statistics/creating-a-confidence-interval-for-the-difference-of-two-means-with-known-standard-deviations/

def diff_in_cols(df_pos, df_neg, col):
    acs_std = df_pos[col].describe()["std"]
    nonacs_std = df_neg[col].describe()["std"]
    ci_diff = 1.96 * math.sqrt(((acs_std ** 2) / df_pos.shape[0]) + ((nonacs_std ** 2) / df_neg.shape[0]))
    diff = df_pos[col].describe()["50%"] - df_neg[col].describe()["50%"] 
    print(f"{col} difference = {round(diff, 3)} [{round(diff - ci_diff, 3)}-{round(diff + ci_diff, 3)}]")


### Tachycardic

In [None]:
df_pos = df[df["outcome_tachycardic"] == 1]
df_neg = df[df["outcome_tachycardic"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_neg, df_pos, "Age")

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

### Hypoxic

In [None]:
df_pos = df[df["outcome_hypoxic"] == 1]
df_neg = df[df["outcome_hypoxic"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_pos, df_neg, "Age")

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_neg, df_pos, "Triage_SpO2")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

### Hypotension

In [None]:
df_pos = df[df["outcome_hypotensive"] == 1]
df_neg = df[df["outcome_hypotensive"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_neg, df_pos, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

# 120 Min

## Load Files

In [None]:
with open("/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_all_120min.json", "r") as f:
    data_ids = json.load(f)
    

In [None]:
data_ids.keys()

In [None]:
print(len(data_ids["train_ids"]))
print(len(data_ids["val_ids"]))
print(len(data_ids["test_ids"]))

In [None]:
final_ids = []
final_ids.extend(data_ids["train_ids"])
final_ids.extend(data_ids["val_ids"])
final_ids.extend(data_ids["test_ids"])
print(len(final_ids))

In [None]:
df_labels = pd.read_csv("/deep/group/ed-monitor-self-supervised/test_models_v1/final_120min_labels_053022.csv")
df_labels = df_labels[df_labels["CSN"].isin(final_ids)]
print(df_labels.shape)
df_labels.head(2)

In [None]:
df_tachy = df_labels[df_labels["HR"] > 110]
print(df_tachy.shape)
df_tachy_pos = set(df_tachy["CSN"].tolist())

In [None]:
df_hypoxic = df_labels[df_labels["SPO2"] < 90]
print(df_hypoxic.shape)
df_hypoxic_pos = set(df_hypoxic["CSN"].tolist())
print(len(df_hypoxic_pos))

In [None]:
df_hypotensive = df_labels[df_labels["MAP"] < 65]
print(df_hypotensive.shape)
df_hypotensive_pos = set(df_hypotensive["CSN"].tolist())
print(len(df_hypotensive_pos))

In [None]:
df = pd.read_csv("/deep/group/ed-monitor-self-supervised/v4/visits_ssl_2022_05_23.csv")
df = df[df["CSN"].isin(final_ids)]
print(df.shape)
df.head(3)

In [None]:
df.groupby(["Triage_acuity"]).describe()

In [None]:
df["Acuity_high"] = (df["Triage_acuity"] == "1-Resuscitation") | (df["Triage_acuity"] == "2-Emergent")

In [None]:
df["Acuity_1"] = (df["Triage_acuity"] == "1-Resuscitation")
df["Acuity_2"] = (df["Triage_acuity"] == "2-Emergent")
df["Acuity_3"] = (df["Triage_acuity"] == "3-Urgent")
df["Acuity_4"] = (df["Triage_acuity"] == "4-Semi-Urgent")
df["Acuity_5"] = (df["Triage_acuity"] == "5-Non-Urgent")

In [None]:
df["Acuity_high"]

In [None]:
set(df["Triage_acuity"].tolist())

In [None]:
df['Admitted'] = [val in admission_set for val in df["ED_dispo"]]
df['Admitted']

In [None]:
df["outcome_tachycardic"] = df.apply (lambda row: int(row["CSN"] in df_tachy_pos), axis=1)
df["outcome_hypotensive"] = df.apply (lambda row: int(row["CSN"] in df_hypotensive_pos), axis=1)
df["outcome_hypoxic"] = df.apply (lambda row: int(row["CSN"] in df_hypoxic_pos), axis=1)
df.head(3)

In [None]:
df["Align_from_arrival"] = df["Arrival_to_roomed"] + df["Align_from_roomed"]
df.head(1)

## Statistics

In [None]:
# CI difference in populations
# https://www.dummies.com/education/math/statistics/creating-a-confidence-interval-for-the-difference-of-two-means-with-known-standard-deviations/

def diff_in_cols(df_pos, df_neg, col):
    acs_std = df_pos[col].describe()["std"]
    nonacs_std = df_neg[col].describe()["std"]
    ci_diff = 1.96 * math.sqrt(((acs_std ** 2) / df_pos.shape[0]) + ((nonacs_std ** 2) / df_neg.shape[0]))
    diff = df_pos[col].describe()["50%"] - df_neg[col].describe()["50%"] 
    print(f"{col} difference = {round(diff, 3)} [{round(diff - ci_diff, 3)}-{round(diff + ci_diff, 3)}]")


### Tachycardic

In [None]:
df_pos = df[df["outcome_tachycardic"] == 1]
df_neg = df[df["outcome_tachycardic"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_neg, df_pos, "Age")

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

### Hypoxic

In [None]:
df_pos = df[df["outcome_hypoxic"] == 1]
df_neg = df[df["outcome_hypoxic"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_pos, df_neg, "Age")

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_neg, df_pos, "Triage_SpO2")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")

### Hypotension

In [None]:
df_pos = df[df["outcome_hypotensive"] == 1]
df_neg = df[df["outcome_hypotensive"] == 0]
print_statistics(df_pos, df_neg)

In [None]:
diff_in_cols(df_pos, df_neg, "ED_LOS")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_HR")

In [None]:
diff_in_cols(df_neg, df_pos, "Triage_MAP")

In [None]:
diff_in_cols(df_pos, df_neg, "Triage_RR")