In [1]:
import os 
import re
import json
import icd10
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from itertools import product
from tqdm import tqdm
import time

from sksurv.linear_model import CoxPHSurvivalAnalysis, CoxnetSurvivalAnalysis
from sksurv.ensemble import RandomSurvivalForest, GradientBoostingSurvivalAnalysis

from sksurv.metrics import cumulative_dynamic_auc, concordance_index_censored, integrated_brier_score

from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from scipy.stats import sem

import importlib.util
spec = importlib.util.spec_from_file_location('script_utils', '/PHShome/jpc91/clinical_text_project/python_scripts/script_utils.py') 
script_utils = importlib.util.module_from_spec(spec)	 
spec.loader.exec_module(script_utils)

import warnings
warnings.filterwarnings("ignore")

def find_icd_code(code):
    if icd10.exists(code):
        code_lookup = icd10.find(code)
        return code_lookup.description
    else:
        return code

def find_icd_block_description(code):
    if icd10.exists(code):
        try:
            return icd10.find(code).block_description
        except:
            return None
    elif code in ['brainM', 'boneM', 'adrenalM', 'liverM', 'lungM', 'nodeM', 'peritonealM']:
        return 'Metastasis'
    elif code in ['death', 'vte']:
        return code
    else:
        return None

data_path = '/data/gusev/USERS/jpconnor/clinical_text_project/data/'
surv_path = data_path + 'survival_data/'
notes_path = data_path + 'batched_datasets/VTE_data/processed_datasets/'
results_path = data_path + 'survival_data/results/ICD_predictions_v3/'
summary_path = surv_path + 'results/stage_type_ICD_results/'
events = os.listdir(results_path)

In [6]:
os.listdir(results_path + events[0])

['coxPH_no_embeddings_full_data.csv', 'coxPH_decayed_embeddings_full_data.csv']

In [2]:
events = os.listdir(results_path)

models=['coxPH']
met_events = ['brainM', 'boneM', 'adrenalM', 'liverM', 'lungM', 'nodeM', 'peritonealM']
metrics_col = ['mean_c_index', 'sem_c_index', 'mean_auc(t)', 'sem_auc(t)', 'mean_ibs', 'sem_ibs']
model_performance = []
for event in events:
    for model in models:
        res_df = pd.read_csv(results_path + event + '/' + model + '_decayed_embeddings_full_data.csv', index_col=0).sort_values(by='mean_auc(t)', ascending=False)
        model_performance.append([event, model] + res_df[metrics_col].iloc[0].tolist())
full_res_df = pd.DataFrame(model_performance, columns=['event', 'model'] + metrics_col)

In [4]:
without_na = full_res_df.sort_values(by='mean_auc(t)', ascending=False).dropna()

In [8]:
without_na.loc[without_na['event'] == 'death']

Unnamed: 0,event,model,mean_c_index,sem_c_index,mean_auc(t),sem_auc(t),mean_ibs,sem_ibs
7,death,coxPH,0.851435,0.003639,0.908637,0.003315,12532.750054,6888.061505


In [9]:
without_na['event_descr'] = without_na['event'].apply(find_icd_code)
without_na['event_block_descr'] = without_na['event'].apply(find_icd_block_description)

In [12]:
without_na.loc[without_na['event_block_descr'] == 'Metastasis', 'mean_auc(t)'].mean()

np.float64(0.8669348406364122)

In [15]:
only_icd = without_na.loc[~without_na['event_block_descr'].isin(['Metastasis', 'death', 'vte'])]
len(only_icd.loc[only_icd['mean_auc(t)'] >= 0.7]) / len(only_icd)

KeyError: 'mean_auc(t)'