In [0]:
%pip install cloudpickle==1.6.0 keras==2.9 tensorflow==2.9 tensorflow-addons[tensorflow] scikit-learn==0.24.1

## ED Insights performance calculations

In [0]:
#  keras==2.8.0
import sklearn
import pickle
import pandas as pd
import numpy as np
# import shap
import json
import mlflow
from mlflow.tracking import MlflowClient
from datetime import datetime
import os
import keras
from pyspark.sql import functions as F

In [0]:
X_train = spark.read.table('bhe.vae_knn_v42_train').toPandas()
X_test = spark.read.table('bhe.vae_knn_v42_test').toPandas()

In [0]:
# prod data comes from separate table
# filter by dates for consistency with past analyses
X_prod = spark.read.table("bhe.vae_knn_v42_prod_results").toPandas()
X_prod['date'] = pd.to_datetime(X_prod['date'])
X_prod = X_prod[
  (X_prod['date'] > '2022-04-11')
  & (X_prod['date'] < '2022-06-19')
]

# labels stored in separate table
labels = spark.read.table('monitoring_observability_gold.eri_n3_predictions_labels').filter(
  F.col('model_version') == 35
).toPandas()
labels['date'] = pd.to_datetime(labels['date'])

X_prod['date'] = pd.to_datetime(X_prod['date'])
labels['date'] = pd.to_datetime(labels['date'])

# prod features and labels
X_prod = X_prod.merge(labels[['mrn', 'date', 'label']], on=['mrn', 'date'], how='left')

In [0]:
# load artifacts from trained model

model_name = 'ED Predictions'
model_version = 42

def load_artifacts(model_name='ED Prediction', model_version=42):
    """
    Grab and return relevant artifacts for the given model + version.
    """
    client = mlflow.tracking.MlflowClient()

    artifact_dir = f"./model-{model_version}"
    if not os.path.exists(artifact_dir):
        os.mkdir(artifact_dir)
    
    loaded_model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")
    local_path = client.download_artifacts(loaded_model._model_meta.run_id, "model", artifact_dir)
    sklearn_model = loaded_model._model_impl.python_model.model
    
    X_train = pd.read_pickle(f'{artifact_dir}/model/artifacts/train_df.pkl')
    X_test = pd.read_pickle(f'{artifact_dir}/model/artifacts/test_df.pkl')

    with open(local_path + '/metadata.json') as f:
        metadata = json.load(f)
    
    return {"model": sklearn_model, "metadata": metadata, "train": X_train, "test": X_test}
  
artifacts = load_artifacts()
metadata = artifacts['metadata']

In [0]:
# race remapping
# some racial groups are very sparse so they are collapsed into the "other" category

def race_vector_remap(df):
  df['remap_race_vector_white_or_caucasian'] = df['race_vector_white_or_caucasian']
  df['rempap_race_vector_black_or_african_american'] = df['race_vector_black_or_african_american']
  df['remap_race_vector_asian'] = df['race_vector_asian']
  df['remap_race_vector_other'] = ~df['race_vector_asian'].astype(bool) \
    & ~df['race_vector_black_or_african_american'].astype(bool) \
    & ~df['race_vector_white_or_caucasian'].astype(bool)
  
  return df

X_train = race_vector_remap(X_train)
X_test = race_vector_remap(X_test)
X_prod = race_vector_remap(X_prod)

### Incident rates

In [0]:
X_train[metadata['label_col']].value_counts().loc[1], X_train[metadata['label_col']].value_counts(normalize=True).loc[1].round(4)

In [0]:
X_test[metadata['label_col']].value_counts().loc[1], X_test[metadata['label_col']].value_counts(normalize=True).loc[1].round(4)

In [0]:
X_prod['label'].value_counts().loc[1], X_prod['label'].value_counts(normalize=True).loc[1].round(4)

### Dataset size

In [0]:
len(X_train), len(X_test), len(X_prod)

In [0]:
X_train['mrn'].nunique(), X_test['mrn'].nunique(), X_prod['mrn'].nunique()

In [0]:
(X_prod['date'].max() - X_prod['date'].min())

### Demographic data

In [0]:
from collections import defaultdict
def get_demo_by_like(df_train, df_test, df_prod, like):
  """Get counts of rows in different dataframes by filtering subtext."""
  train, test, prod = defaultdict(list), defaultdict(list), defaultdict(list)
  for c in [i for i in df_train.columns if like in i]:
    train[c].append(len(df_train[df_train[c] == 1]))
    test[c].append(len(df_test[df_test[c] == 1]))
    prod[c].append(len(df_prod[df_prod[c] == 1]))
  result = pd.concat([
    pd.DataFrame(train),
    pd.DataFrame(test),
    pd.DataFrame(prod)
  ], ignore_index=True)
  result.index = ['train', 'test', 'prod']
  
  return result.T

In [0]:
get_demo_by_like(X_train ,X_test, X_prod, 'birthsex')

Unnamed: 0,train,test,prod
birthsex_vector_unk,199,13,14
birthsex_vector_m,77919,18611,6933
birthsex_vector_f,68711,18450,7516


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'remap_race_vector')

Unnamed: 0,train,test,prod
remap_race_vector_white_or_caucasian,110697,27251,11254
remap_race_vector_asian,6498,1863,657
remap_race_vector_other,13522,3607,1132


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'ethnicity_vector')

Unnamed: 0,train,test,prod
ethnicity_vector_unknown,3253,638,325
ethnicity_vector_not_hispanic_or_latino,122234,30442,12236
ethnicity_vector_hispanic_or_latino,20850,5851,1902


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'cancer_type')

Unnamed: 0,train,test,prod
cancer_type_breast-cancer,22551,6361,2231
cancer_type_prostate_cancer_combined,8006,1824,676
cancer_type_kidney_cancer_combined,7114,1693,519
cancer_type_bladder_cancer_combined,6106,1854,397
cancer_type_urethral_cancer_combined,2072,714,344
cancer_type_testicular_cancer_combined,1594,199,88
cancer_type_ureter_cancer_combined,822,194,38
cancer_type_penile_cancer_combined,366,61,25


In [0]:
def cancer_type_enrich(df):
  """Enrich dataframe with additional cancer type info"""
  
  tmp_df = spark.createDataFrame(df)

  tmp_cancer_enriched = spark.read.table(
    'lauren_kerr_ad_hoc.enriched_cancer_type_from_conditions_problems'
  ).select(
    'mrn', F.col('cancer_group').alias('cancer_type')
  ).filter(
    (F.col('cancer_type') != 'AllCancerDiagnoses')
  ).drop_duplicates()

  tmp_df = tmp_df.join(tmp_cancer_enriched, on='mrn', how='left')
  
  return tmp_df.toPandas()

In [0]:
X_train_cancer_enriched = cancer_type_enrich(X_train)
X_test_cancer_enriched = cancer_type_enrich(X_test)
X_prod_cancer_enriched = cancer_type_enrich(X_prod)

In [0]:
# departmental count
pd.DataFrame(
  X_train_cancer_enriched['cancer_type'].value_counts(dropna=False)
).rename(columns={'cancer_type': 'train'}).join(
  pd.DataFrame(
    X_test_cancer_enriched['cancer_type'].value_counts(dropna=False)
  ).rename(columns={'cancer_type': 'test'})
).join(
  pd.DataFrame(
    X_prod_cancer_enriched['cancer_type'].value_counts(dropna=False)
  ).rename(columns={'cancer_type': 'prod'})
)

Unnamed: 0,train,test,prod
GU Cancer,74215,17744,5837
Breast Cancer,51507,14027,5626
,22693,5614,3101


### Lab/medical info

In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'm_value_pathologic_vector')

Unnamed: 0,train,test,prod
m_value_pathologic_vector_1,6862,1754,777
m_value_pathologic_vector_0,22935,5955,2707


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'm_value_clinical_vector')

Unnamed: 0,train,test,prod
m_value_clinical_vector_1,16605,4155,1541
m_value_clinical_vector_0,29260,7361,3831


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 't_value_pathologic_vector')

Unnamed: 0,train,test,prod
t_value_pathologic_vector_4,1515,523,302
t_value_pathologic_vector_3,5080,1548,572
t_value_pathologic_vector_2,7216,1495,786
t_value_pathologic_vector_1,10300,2729,1136
t_value_pathologic_vector_0,1849,490,279


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 't_value_clinical_vector')

Unnamed: 0,train,test,prod
t_value_clinical_vector_4,6019,1616,664
t_value_clinical_vector_3,7671,1823,956
t_value_clinical_vector_2,13553,3313,1685
t_value_clinical_vector_1,9651,2528,1113
t_value_clinical_vector_0,969,297,184


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'n_value_pathologic_vector')

Unnamed: 0,train,test,prod
n_value_pathologic_vector_3,1993,669,299
n_value_pathologic_vector_2,2995,630,440
n_value_pathologic_vector_1,7465,1667,889
n_value_pathologic_vector_0,13673,3807,1460


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'n_value_clinical_vector')

Unnamed: 0,train,test,prod
n_value_clinical_vector_3,4880,1165,649
n_value_clinical_vector_2,2297,562,259
n_value_clinical_vector_1,11790,2779,1390
n_value_clinical_vector_0,19758,5099,2496


In [0]:
get_demo_by_like(X_train, X_test, X_prod, 'comorbidity')

Unnamed: 0,train,test,prod
comorbidity_blood_loss_anemia,18271,4809,1688
comorbidity_fluid-electrolyte_disorder,15267,3850,1317
comorbidity_hypertension,18484,4916,1609
comorbidity_weight_loss,9074,1972,758
comorbidity_cardiac_arrhythmia,7578,1874,688
comorbidity_anemia,6933,1817,732
comorbidity_diabetes_uncomplicated,6467,1749,699
comorbidity_renal_failure,6540,1628,525
comorbidity_hypothyroidism,7183,1926,723
comorbidity_depression,5896,1526,528


### Performance

In [0]:
# test & prod performance
sklearn.metrics.roc_auc_score(
  X_test[metadata['label_col']], X_test['prediction']
), sklearn.metrics.roc_auc_score(
  X_prod['label'], X_prod['candidate_predicted_risk']
)

In [0]:
def get_demo_perf_by_like(df_test, df_prod, like):
  """Calcualte AUC on test and prod data filtering columns by subtext"""
  test, prod = defaultdict(list), defaultdict(list)
  for c in [i for i in df_test.columns if like in i]:
    tmp_test = df_test[df_test[c] == 1]
    try:
      auc_test = sklearn.metrics.roc_auc_score(tmp_test[metadata['label_col']], tmp_test['prediction'])
    except ValueError:
      auc_test = -99
      
    tmp_prod = df_prod[df_prod[c] == 1]
    try:
      auc_prod = sklearn.metrics.roc_auc_score(tmp_prod['label'], tmp_prod['candidate_predicted_risk'])
    except ValueError:
      auc_prod = -99
      
    test[c].append(auc_test)
    prod[c].append(auc_prod)
    
  result = pd.concat([
    pd.DataFrame(test),
    pd.DataFrame(prod)
  ], ignore_index=True)
  result.index = ['test', 'prod']
  return result.T


def get_demo_perf_by_like_bootstrap(df_test, df_prod, like):
  """Calcualte bootstrapped AUC on test and prod data filtering columns by subtext"""
  test, prod = defaultdict(list), defaultdict(list)
  for c in [i for i in df_test.columns if like in i]:
    tmp_test = df_test[df_test[c] == 1]
    tmp_prod = df_prod[df_prod[c] == 1]
    
    tests = []
    prods = []
    
    for _ in range(2000):
      tmp_test_bs = tmp_test.sample(frac=1, replace=True)
      tmp_prod_bs = tmp_prod.sample(frac=1, replace=True)
      try:
        tests.append(sklearn.metrics.roc_auc_score(tmp_test_bs[metadata['label_col']], tmp_test_bs['prediction']))
      except ValueError:
        tests.append(np.nan)
      try:
        prods.append(sklearn.metrics.roc_auc_score(tmp_prod_bs['label'], tmp_prod_bs['candidate_predicted_risk']))
      except ValueError:
        prods.append(np.nan)
      
    test[c].append(f"{np.round(np.mean(tests) - (np.std(tests) * 2), 2), np.round(np.mean(tests) + (np.std(tests) * 2), 2)}")
    prod[c].append(f"{np.round(np.mean(prods) - (np.std(prods) * 2), 2), np.round(np.mean(prods) + (np.std(prods) * 2), 2)}")
    
  result = pd.concat([
    pd.DataFrame(test),
    pd.DataFrame(prod)
  ], ignore_index=True)
  result.index = ['test', 'prod']
  return result.T
  
    # print(f"{c:<80} {auc_test:>20} {auc_prod:>20}")

In [0]:
get_demo_perf_by_like(X_test, X_prod, 'birthsex')

Unnamed: 0,test,prod
birthsex_vector_unk,-99.0,0.416667
birthsex_vector_m,0.794758,0.776841
birthsex_vector_f,0.793777,0.739818


In [0]:
get_demo_perf_by_like_bootstrap(X_test, X_prod, 'birthsex')

Unnamed: 0,test,prod
birthsex_vector_unk,"(nan, nan)","(nan, nan)"
birthsex_vector_m,"(0.78, 0.81)","(0.75, 0.8)"
birthsex_vector_f,"(0.78, 0.81)","(0.72, 0.76)"


In [0]:
get_demo_perf_by_like(X_test, X_prod, 'remap_race_vector')

Unnamed: 0,test,prod
remap_race_vector_white_or_caucasian,0.788626,0.755048
remap_race_vector_asian,0.768082,0.777283
remap_race_vector_other,0.796017,0.763102


In [0]:
get_demo_perf_by_like_bootstrap(X_test, X_prod, 'remap_race_vector')

Unnamed: 0,test,prod
remap_race_vector_white_or_caucasian,"(0.78, 0.8)","(0.74, 0.77)"
remap_race_vector_asian,"(0.73, 0.81)","(0.71, 0.85)"
remap_race_vector_other,"(0.77, 0.82)","(0.71, 0.82)"


In [0]:
get_demo_perf_by_like(X_test, X_prod, 'ethnicity_vector')

Unnamed: 0,test,prod
ethnicity_vector_unknown,0.769402,0.819435
ethnicity_vector_not_hispanic_or_latino,0.793106,0.761831
ethnicity_vector_hispanic_or_latino,0.807038,0.711785


In [0]:
get_demo_perf_by_like_bootstrap(X_test, X_prod, 'ethnicity_vector')

Unnamed: 0,test,prod
ethnicity_vector_unknown,"(0.69, 0.84)","(0.74, 0.9)"
ethnicity_vector_not_hispanic_or_latino,"(0.78, 0.8)","(0.74, 0.78)"
ethnicity_vector_hispanic_or_latino,"(0.79, 0.83)","(0.66, 0.77)"


In [0]:
get_demo_perf_by_like(X_test, X_prod, 'cancer_type')

Unnamed: 0,test,prod
cancer_type_breast-cancer,0.791149,0.725873
cancer_type_prostate_cancer_combined,0.820376,0.728067
cancer_type_kidney_cancer_combined,0.726157,0.715468
cancer_type_bladder_cancer_combined,0.68493,0.687032
cancer_type_urethral_cancer_combined,0.710586,0.621979
cancer_type_testicular_cancer_combined,0.76676,0.52907
cancer_type_ureter_cancer_combined,0.527155,0.338542
cancer_type_penile_cancer_combined,-99.0,1.0


In [0]:
from collections import defaultdict
def groupby_auc(df_test, df_prod, col):
  """Calculate AUC using groupby"""
  test = defaultdict(list)
  for name, group in df_test.groupby(col):
    test[col].append(name)
    test['auc_test'].append(
      sklearn.metrics.roc_auc_score(group[metadata['label_col']], group['prediction'])
    )
    
  prod = defaultdict(list)
  for name, group in df_prod.groupby(col):
    prod[col].append(name)
    prod['auc_prod'].append(
      sklearn.metrics.roc_auc_score(group['label'], group['candidate_predicted_risk'])
    )
  
  return pd.DataFrame(test).set_index(col).join(
    pd.DataFrame(prod).set_index(col)
  ).rename(columns={'auc_test': 'test', 'auc_prod': 'prod'})

In [0]:
groupby_auc(X_test_cancer_enriched, X_prod_cancer_enriched, 'cancer_type')

Unnamed: 0_level_0,test,prod
cancer_type,Unnamed: 1_level_1,Unnamed: 2_level_1
Breast Cancer,0.799973,0.733487
GU Cancer,0.779924,0.762549


In [0]:
from collections import defaultdict
def groupby_auc_bootstrap(df_test, df_prod, col):
  """Calculate bootsrapped AUC using groupby"""
  test = defaultdict(list)
  for name, group in df_test.groupby(col):
    test[col].append(name)
    
    tests = []
    for _ in range(2000):
      tmp_test_bs = group.sample(frac=1, replace=True)
      try:
        tests.append(sklearn.metrics.roc_auc_score(tmp_test_bs[metadata['label_col']], tmp_test_bs['prediction']))
      except ValueError:
        tests.append(np.nan)
        
    test['auc_test'].append(
      f"{np.round(np.mean(tests) - (np.std(tests) * 2), 2), np.round(np.mean(tests) + (np.std(tests) * 2), 2)}"
    )
    
  prod = defaultdict(list)
  for name, group in df_prod.groupby(col):
    prod[col].append(name)
    
    prods = []
    for _ in range(2000):
      tmp_prob_bs = group.sample(frac=1, replace=True)
      try:
        prods.append(sklearn.metrics.roc_auc_score(tmp_prob_bs['label'], tmp_prob_bs['candidate_predicted_risk']))
      except ValueError:
        prods.append(np.nan)
        
    test['auc_prod'].append(
      f"{np.round(np.mean(prods) - (np.std(prods) * 2), 2), np.round(np.mean(prods) + (np.std(prods) * 2), 2)}"
    )
  
  return pd.DataFrame(test).set_index(col).join(
    pd.DataFrame(prod).set_index(col)
  ).rename(columns={'auc_test': 'test', 'auc_prod': 'prod'})

In [0]:
groupby_auc_bootstrap(X_test_cancer_enriched, X_prod_cancer_enriched, 'cancer_type')

Unnamed: 0_level_0,test,prod
cancer_type,Unnamed: 1_level_1,Unnamed: 2_level_1
Breast Cancer,"(0.79, 0.81)","(0.71, 0.76)"
GU Cancer,"(0.77, 0.79)","(0.74, 0.79)"


In [0]:
X_test_cancer_enriched['dummy_pivot'] = 1
X_prod_cancer_enriched['dummy_pivot'] = 1
groupby_auc_bootstrap(X_test_cancer_enriched, X_prod_cancer_enriched, 'dummy_pivot')

Unnamed: 0_level_0,test,prod
dummy_pivot,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"(0.79, 0.8)","(0.74, 0.77)"
