In [1]:
# Import required libraries
import pandas as pd
from sklearn.metrics import roc_curve, auc, plot_roc_curve, roc_auc_score, accuracy_score, precision_recall_curve
from sklearn.model_selection import cross_validate, train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import matplotlib.pyplot as plt
import seaborn
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from random_forest_functions import *

In [2]:
random_seed = 51

In [3]:
# Load in the medical claims data
medical_claims = pd.read_csv('Data/med_full_final_melted.csv')
medical_claims

# Load in the pharmacy claims data
rx_claims = pd.read_csv('Data/rx_full_final.csv')

# Load in the edges file and exclusions file
edges = pd.read_csv('Data/edges.csv')
exclusions = pd.read_csv('Data/exclusions.csv')
exclusion_list = exclusions['Base Code'].to_list()

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
## Read All data With Ages and Without Timebound
all_data_with_ages_without_timebound = pd.read_csv('./Data/all_data_with_ages.csv')

In [5]:
medical_claims_timebound = create_time_bound_claims(medical_claims, 'E11')
(predictor_codes, code_descriptions, weights) = get_predictor_codes(edges, exclusion_list, 'E11')
E11_predictor_codes=predictor_codes
medical_claims_timebound = create_time_bound_claims(medical_claims, 'C18')
(predictor_codes, code_descriptions, weights) = get_predictor_codes(edges, exclusion_list, 'C18')
C18_predictor_codes=predictor_codes
medical_claims_timebound = create_time_bound_claims(medical_claims, 'C50')
(predictor_codes, code_descriptions, weights) = get_predictor_codes(edges, exclusion_list, 'C50')
C50_predictor_codes=predictor_codes
medical_claims_timebound = create_time_bound_claims(medical_claims, 'I10')
(predictor_codes, code_descriptions, weights) = get_predictor_codes(edges, exclusion_list, 'I10')
I10_predictor_codes=predictor_codes
medical_claims_timebound = create_time_bound_claims(medical_claims, 'I25')
(predictor_codes, code_descriptions, weights) = get_predictor_codes(edges, exclusion_list, 'I25')
I25_predictor_codes=predictor_codes
medical_claims_timebound = create_time_bound_claims(medical_claims, 'N18')
(predictor_codes, code_descriptions, weights) = get_predictor_codes(edges, exclusion_list, 'N18')
N18_predictor_codes=predictor_codes


print("E11_top_20_predictor_codes are {0}".format(E11_predictor_codes))
print("C18_top_20_predictor_codes are {0}".format(C18_predictor_codes))
print("C50_top_20_predictor_codes are {0}".format(C50_predictor_codes))
print("I10_top_20_predictor_codes are {0}".format(I10_predictor_codes))
print("I25_top_20_predictor_codes are {0}".format(I25_predictor_codes))
print("N18_top_20_predictor_codes are {0}".format(N18_predictor_codes))

E11_top_20_predictor_codes are ['I10', 'E78', 'M25', 'M54', 'M79', 'E66', 'H25', 'G47', 'I25', 'K21', 'E03', 'B35', 'H40', 'N39', 'M47', 'Z86', 'D64', 'M17', 'M19', 'F41']
C18_top_20_predictor_codes are ['I10', 'E78', 'K63', 'D12', 'M79', 'Z86', 'D64', 'M25', 'C78', 'K57', 'Z45', 'K62', 'K76', 'M54', 'N39', 'K21', 'E11', 'I25', 'Z90', 'E66']
C50_top_20_predictor_codes are ['I10', 'E78', 'M25', 'M79', 'M54', 'N63', 'Z90', 'Z80', 'N64', 'K21', 'L82', 'H25', 'D05', 'E03', 'M85', 'F41', 'Z86', 'E11', 'N39', 'D48']
I10_top_20_predictor_codes are ['E78', 'M25', 'M54', 'E11', 'M79', 'E66', 'K21', 'H25', 'G47', 'I25', 'F41', 'L82', 'E03', 'M47', 'M17', 'Z86', 'N39', 'M19', 'L57', 'J30']
I25_top_20_predictor_codes are ['I10', 'E78', 'M25', 'E11', 'M79', 'M54', 'Z95', 'H25', 'K21', 'G47', 'L82', 'Z86', 'I48', 'L57', 'M47', 'D64', 'I50', 'I51', 'I49', 'M19']
N18_top_20_predictor_codes are ['I10', 'E78', 'E11', 'I12', 'M25', 'M79', 'M54', 'I25', 'E87', 'D64', 'N17', 'N28', 'H25', 'K21', 'N39', 'I5

In [6]:
### Run it only if needed to tune the hyperparamters, predictor codes for E11 are selected based on the top 20 ICD codes associated with this Disease(based on Weight)

#E11_predictor_codes=['I10','E78','M25','M54','M79','E66','H25','G47','I25','K21','E03','B35','H40','N39','M47','Z86','D64','M17','M19','F41']
E11_random_forest_hyper_params_with_ages_without_timebound=hyperparameter_tuning(all_data_with_ages_without_timebound, 'E11', E11_predictor_codes,random_seed = 51)
print("E11 Hyper Parameters:", E11_random_forest_hyper_params_with_ages_without_timebound)

E11 Hyper Parameters: {'max_depth': 8, 'n_estimators': 400}


In [7]:
### Run it only if needed to tune the hyperparamters, predictor codes for C18 are selected based on the top 20 ICD codes associated with this Disease(based on Weight)

#C18_predictor_codes==['I10','E78','K63','D12','M79','Z86','D64','M25','C78','K57','Z45','K62','K76','M54','N39','K21','E11','I25','Z90','E66']
C18_random_forest_hyper_params_with_ages_without_timebound=hyperparameter_tuning(all_data_with_ages_without_timebound, 'C18', C18_predictor_codes,random_seed = 51)
print("C18_random_forest_hyper_params_with_ages_without_timebound:", C18_random_forest_hyper_params_with_ages_without_timebound)

C18_random_forest_hyper_params_with_ages_without_timebound: {'max_depth': 2, 'n_estimators': 100}


In [8]:
### Run it only if needed to tune the hyperparamters, predictor codes for C50 are selected based on the top 20 ICD codes associated with this Disease(based on Weight)

#C50_predictor_codes=['I10','E78','M25','M79','M54','N63','Z90','Z80','N64','K21','L82','H25','D05','E03','M85','Z86','F41','E11','N39','D48']
C50_random_forest_hyper_params_with_ages_without_timebound=hyperparameter_tuning(all_data_with_ages_without_timebound, 'C50', C50_predictor_codes,random_seed = 51)
print("C50_random_forest_hyper_params_with_ages_without_timebound:", C50_random_forest_hyper_params_with_ages_without_timebound)

C50_random_forest_hyper_params_with_ages_without_timebound: {'max_depth': 8, 'n_estimators': 200}


In [9]:
### Run it only if needed to tune the hyperparamters, predictor codes for I10 are selected based on the top 20 ICD codes associated with this Disease(based on Weight)

#I10_predictor_codes=['E78','M25','M54','E11','M79','E66','K21','H25','G47','I25','F41','L82','E03','M47','M17','Z86','N39','M19','L57','J30']
I10_random_forest_hyper_params_with_ages_without_timebound=hyperparameter_tuning(all_data_with_ages_without_timebound, 'I10', I10_predictor_codes,random_seed = 51)
print("I10_random_forest_hyper_params_with_ages_without_timebound:", I10_random_forest_hyper_params_with_ages_without_timebound)

I10_random_forest_hyper_params_with_ages_without_timebound: {'max_depth': 8, 'n_estimators': 200}


In [10]:
### Run it only if needed to tune the hyperparamters, predictor codes for I25 are selected based on the top 20 ICD codes associated with this Disease(based on Weight)

#I25_predictor_codes=['I10','E78','M25','E11','M79','M54','Z95','H25','K21','G47','L82','Z86','I48','L57','M47','D64','I50','I51','I49','M19']
I25_random_forest_hyper_params_with_ages_without_timebound=hyperparameter_tuning(all_data_with_ages_without_timebound, 'I25', I25_predictor_codes,random_seed = 51)
print("I25_random_forest_hyper_params_with_ages_without_timebound:", I25_random_forest_hyper_params_with_ages_without_timebound)

I25_random_forest_hyper_params_with_ages_without_timebound: {'max_depth': 8, 'n_estimators': 300}


In [6]:
### Run it only if needed to tune the hyperparamters, predictor codes for N18 are selected based on the top 20 ICD codes associated with this Disease(based on Weight)

#N18_predictor_codes=['I10','E78','E11','I12','M25','M79','M54','I25','E87','D64','N17','N28','H25','K21','N39','I50','G47','Z86','I51','B35']
N18_random_forest_hyper_params_with_ages_without_timebound=hyperparameter_tuning(all_data_with_ages_without_timebound, 'N18', N18_predictor_codes,random_seed = 51)
print("N18_random_forest_hyper_params_with_ages_without_timebound:", N18_random_forest_hyper_params_with_ages_without_timebound)

N18_random_forest_hyper_params_with_ages_without_timebound: {'max_depth': 6, 'n_estimators': 100}


In [7]:
E11_random_forest_hyper_params_with_ages_with_timebound=hyperparameter_tuning_timebound(medical_claims, rx_claims, 'E11',edges, exclusion_list,random_seed = 51)
print("E11_random_forest_hyper_params_with_ages_with_timebound",E11_random_forest_hyper_params_with_ages_with_timebound)

E11_random_forest_hyper_params_with_ages_with_timebound {'max_depth': 2, 'n_estimators': 100}


In [8]:
C18_random_forest_hyper_params_with_ages_with_timebound=hyperparameter_tuning_timebound(medical_claims, rx_claims, 'C18',edges, exclusion_list,random_seed = 51)
print("C18_random_forest_hyper_params_with_ages_with_timebound",C18_random_forest_hyper_params_with_ages_with_timebound)

C18_random_forest_hyper_params_with_ages_with_timebound {'max_depth': 2, 'n_estimators': 100}


In [9]:
C50_random_forest_hyper_params_with_ages_with_timebound=hyperparameter_tuning_timebound(medical_claims, rx_claims, 'C50',edges, exclusion_list,random_seed = 51)
print("C50_random_forest_hyper_params_with_ages_with_timebound",C50_random_forest_hyper_params_with_ages_with_timebound)

C50_random_forest_hyper_params_with_ages_with_timebound {'max_depth': 8, 'n_estimators': 200}


In [10]:
I10_random_forest_hyper_params_with_ages_with_timebound=hyperparameter_tuning_timebound(medical_claims, rx_claims, 'I10',edges, exclusion_list,random_seed = 51)
print("I10_random_forest_hyper_params_with_ages_with_timebound",I10_random_forest_hyper_params_with_ages_with_timebound)

I10_random_forest_hyper_params_with_ages_with_timebound {'max_depth': 8, 'n_estimators': 200}


In [11]:
I25_random_forest_hyper_params_with_ages_with_timebound=hyperparameter_tuning_timebound(medical_claims, rx_claims, 'I25',edges, exclusion_list,random_seed = 51)
print("I25_random_forest_hyper_params_with_ages_with_timebound",I25_random_forest_hyper_params_with_ages_with_timebound)

I25_random_forest_hyper_params_with_ages_with_timebound {'max_depth': 8, 'n_estimators': 200}


In [12]:
N18_random_forest_hyper_params_with_ages_with_timebound=hyperparameter_tuning_timebound(medical_claims, rx_claims, 'N18',edges, exclusion_list,random_seed = 51)
print("N18_random_forest_hyper_params_with_ages_with_timebound",N18_random_forest_hyper_params_with_ages_with_timebound)

N18_random_forest_hyper_params_with_ages_with_timebound {'max_depth': 2, 'n_estimators': 100}
