In [4]:
import numpy as np
import pandas as pd
import operator
import pickle

In [2]:
data_dir = "../Data/"
patients = pd.read_csv(data_dir+"PATIENTS.csv.gz", compression="gzip")

In [3]:
narcotic_drugs = ['oxymorphone', 'oxycodone', 'morphine', 'meperidine', 
                  'hydromorphone', 'hydrocodone', 'fentanyl', 'codeine', 'buprenorphine', 'levorphanol', 'methadone']
anti_narcotic_drugs = ['methadone', 'naloxone', 'nalorphine']
both_drugs = narcotic_drugs + anti_narcotic_drugs

### Prescriptions

In [4]:
with open('../Data/Opioids/Prescriptions/prescriptions_with_narcotic_drugs.pickle', 'rb') as pickler:
    prescriptions_with_narcotic_drugs = pickle.load(pickler)
with open('../Data/Opioids/Prescriptions/prescriptions_with_anti_narcotic_drugs.pickle', 'rb') as pickler:
    prescriptions_with_anti_narcotic_drugs = pickle.load(pickler)
with open('../Data/Opioids/Prescriptions/prescriptions_with_mixed_drugs.pickle', 'rb') as pickler:
    prescriptions_with_mixed_drugs = pickle.load(pickler)
with open('../Data/Opioids/Prescriptions/prescriptions_with_both_drugs.pickle', 'rb') as pickler:
    prescriptions_with_both_drugs = pickle.load(pickler)

### Subjects

In [5]:
with open('../Data/Opioids/Subjects/narcotic_subject_df.pickle', 'rb') as pickler:
    narcotic_subject_df = pickle.load(pickler)
with open('../Data/Opioids/Subjects/anti_narcotic_subject_df.pickle', 'rb') as pickler:
    anti_narcotic_subject_df = pickle.load(pickler)

In [2]:
with open('../Data/Opioids/Subjects/subjects_age_df.pickle', 'rb') as pickler:
    subjects_age_df = pickle.load(pickler)

In [6]:
total_subjects = set(narcotic_subject_df["SUBJECT_ID"]).union(set(anti_narcotic_subject_df["SUBJECT_ID"]))
len(total_subjects)

29991

In [7]:
subjects_list = list(total_subjects)
len(subjects_list)

29991

In [8]:
subjects_df = patients[patients["SUBJECT_ID"].isin(total_subjects)]
subjects_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,GENDER,DOB,DOD,DOD_HOSP,DOD_SSN,EXPIRE_FLAG
0,234,249,F,2075-03-13 00:00:00,,,,0
1,235,250,F,2164-12-27 00:00:00,2188-11-22 00:00:00,2188-11-22 00:00:00,,1
3,237,252,M,2078-03-06 00:00:00,,,,0
5,239,255,M,2109-08-05 00:00:00,,,,0
6,240,256,M,2086-07-31 00:00:00,,,,0


### Diagnoses

In [9]:
with open('../Data/Opioids/Diagnoses/diagnoses_with_effects_df.pickle', 'rb') as pickler:
    diagnoses_with_effects_df = pickle.load(pickler)
with open('../Data/Opioids/Diagnoses/subjects_with_effects.pickle', 'rb') as pickler:
    subjects_with_effects = pickle.load(pickler)

In [10]:
narcotic_subject_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTDATE,ENDDATE,DRUG_TYPE,DRUG,DRUG_NAME_POE,DRUG_NAME_GENERIC,FORMULARY_DRUG_CD,GSN,NDC,PROD_STRENGTH,DOSE_VAL_RX,DOSE_UNIT_RX,FORM_VAL_DISP,FORM_UNIT_DISP,ROUTE,NO_OF_DAYS_PRESCRIBED
42,2122405.0,13.0,143045.0,263738.0,2167-01-09 00:00:00,2167-01-12 00:00:00,MAIN,oxycodone-acetaminophen,Oxycodone-Acetaminophen,Oxycodone-Acetaminophen,PERC,4222,54465025.0,5mg/325mg Tab,1-2,TAB,1-2,TAB,PO,4.0
45,2122400.0,13.0,143045.0,263738.0,2167-01-09 00:00:00,2167-01-12 00:00:00,MAIN,morphine sulfate,Morphine Sulfate,Morphine Sulfate,MORP2I,4070,74176201.0,2mg Syringe,0.5-4.0,mg,0.25-2,SYR,IV,4.0
46,2122399.0,13.0,143045.0,263738.0,2167-01-09 00:00:00,2167-01-12 00:00:00,MAIN,meperidine,Meperidine,Meperidine,MEPE50I,4051,54354563.0,50 mg Syringe,12.5,mg,0.25,ml,IV,4.0
127,1213835.0,4.0,185777.0,294638.0,2191-03-16 00:00:00,2191-03-23 00:00:00,MAIN,guaifenesin-codeine phosphate,Guaifenesin-Codeine Phosphate,Guaifenesin-Codeine Phosphate,GGAC5L,45667,31867412.0,5ML UDCUP,5-10,ml,1-2,UDCUP,PO,8.0
173,2214000.0,6.0,107064.0,,2175-05-30 00:00:00,2175-06-01 00:00:00,MAIN,morphine sulfate,Morphine Sulfate,Morphine PCA,MORP50PCA,4067,338268975.0,50mg/50mL Syringe,1,mg,0.02,SYR,IVPCA,3.0


### Total Prescriptions - Narcotic/Anti-Narcotic - For Each Subject

In [11]:
prescriptions_with_narcotic_drugs_grouped = prescriptions_with_narcotic_drugs.groupby('SUBJECT_ID').apply(
    lambda drug: drug[["DRUG"]].apply(lambda y: y.str.contains("|".join(narcotic_drugs)).sum()))
prescriptions_with_narcotic_drugs_grouped.head()

Unnamed: 0_level_0,DRUG
SUBJECT_ID,Unnamed: 1_level_1
4,1
6,4
11,6
12,3
13,4


In [12]:
prescriptions_with_anti_narcotic_drugs_grouped = prescriptions_with_anti_narcotic_drugs.groupby('SUBJECT_ID').apply(
    lambda drug: drug[["DRUG"]].apply(lambda y: y.str.contains("|".join(anti_narcotic_drugs)).sum()))
prescriptions_with_anti_narcotic_drugs_grouped.head()

Unnamed: 0_level_0,DRUG
SUBJECT_ID,Unnamed: 1_level_1
36,1
64,1
78,6
94,2
109,4


In [13]:
narcotic_temp_df = prescriptions_with_narcotic_drugs_grouped["DRUG"]
narcotic_temp_df.to_csv("prescriptions_with_narcotic_drugs_grouped.csv")

In [14]:
anti_narcotic_temp_df = prescriptions_with_anti_narcotic_drugs_grouped["DRUG"]
anti_narcotic_temp_df.to_csv("prescriptions_with_anti_narcotic_drugs_grouped.csv")

In [15]:
prescriptions_with_narcotic_drugs_grouped = pd.read_csv("prescriptions_with_narcotic_drugs_grouped.csv", header=None)
prescriptions_with_narcotic_drugs_grouped.columns=["SUBJECT_ID", "TOTAL_NARCOTIC_PRESCRIPTONS"]
prescriptions_with_narcotic_drugs_grouped.head()

Unnamed: 0,SUBJECT_ID,TOTAL_NARCOTIC_PRESCRIPTONS
0,4,1
1,6,4
2,11,6
3,12,3
4,13,4


In [17]:
prescriptions_with_anti_narcotic_drugs_grouped = pd.read_csv("prescriptions_with_anti_narcotic_drugs_grouped.csv", header=None)
prescriptions_with_anti_narcotic_drugs_grouped.columns=["SUBJECT_ID", "TOTAL_ANTI_NARCOTIC_PRESCRIPTONS"]
prescriptions_with_anti_narcotic_drugs_grouped.head()

Unnamed: 0,SUBJECT_ID,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS
0,36,1
1,64,1
2,78,6
3,94,2
4,109,4


In [60]:
subject_details = pd.DataFrame(columns=["SUBJECT_ID", "GENDER", "AGE", "ICU_STAY", "TOTAL_NARCOTIC_PRESCRIPTONS", "TOTAL_ANTI_NARCOTIC_PRESCRIPTONS", "OXYMORPHONE", "OXYCODONE", "MORPHINE", "MEPERIDINE", 
                  "HYDROMORPHONE", "HYDROCONE", "FENTANYL", "CODEINE", "BUPRENORPHINE", "LEVORPHANOL", "METHADONE", "NALOXONE", "NALORPHINE", "NARCOTIC", "ANTI-NARCOTIC", "EXPIRE_FLAG"])
subject_details

Unnamed: 0,SUBJECT_ID,GENDER,AGE,ICU_STAY,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,OXYMORPHONE,OXYCODONE,MORPHINE,MEPERIDINE,...,FENTANYL,CODEINE,BUPRENORPHINE,LEVORPHANOL,METHADONE,NALOXONE,NALORPHINE,NARCOTIC,ANTI-NARCOTIC,EXPIRE_FLAG


In [61]:
subject_details = subjects_df[["SUBJECT_ID", "GENDER", "EXPIRE_FLAG"]].copy()
print(len(subject_details))
subject_details.head()

29991


Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG
0,249,F,0
1,250,F,1
3,252,M,0
5,255,M,0
6,256,M,0


In [62]:
subject_details = pd.merge(subject_details, prescriptions_with_narcotic_drugs_grouped, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, prescriptions_with_anti_narcotic_drugs_grouped, on="SUBJECT_ID", how='left')
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS
0,249,F,0,8.0,
1,250,F,1,6.0,
2,252,M,0,4.0,
3,255,M,0,2.0,
4,256,M,0,22.0,


In [63]:
subject_details.fillna(0, inplace=True)
subject_details.isnull().sum()
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS
0,249,F,0,8.0,0.0
1,250,F,1,6.0,0.0
2,252,M,0,4.0,0.0
3,255,M,0,2.0,0.0
4,256,M,0,22.0,0.0


In [64]:
subject_details["GENDER"] = subject_details["GENDER"].map({'F': 0, 'M': 1})
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS
0,249,0,0,8.0,0.0
1,250,0,1,6.0,0.0
2,252,1,0,4.0,0.0
3,255,1,0,2.0,0.0
4,256,1,0,22.0,0.0


In [65]:
subject_details["NARCOTIC"] = np.where(subject_details["TOTAL_NARCOTIC_PRESCRIPTONS"] > 0, 1, 0)
subject_details['ANTI-NARCOTIC'] = np.where(subject_details["TOTAL_ANTI_NARCOTIC_PRESCRIPTONS"] > 0, 1, 0)
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC
0,249,0,0,8.0,0.0,1,0
1,250,0,1,6.0,0.0,1,0
2,252,1,0,4.0,0.0,1,0
3,255,1,0,2.0,0.0,1,0
4,256,1,0,22.0,0.0,1,0


### ICU STAY ID

In [25]:
icu_temp_df = prescriptions_with_both_drugs.groupby("SUBJECT_ID").apply(
    lambda prescription: prescription[["SUBJECT_ID", "ICUSTAY_ID"]])
icu_temp_df.head()

Unnamed: 0,SUBJECT_ID,ICUSTAY_ID
42,13,263738.0
45,13,263738.0
46,13,263738.0
127,4,294638.0
173,6,


In [26]:
subject_icu_dict = dict()
for subject in total_subjects:
    subject_icu_dict[subject] = set(icu_temp_df[icu_temp_df["SUBJECT_ID"] == subject]["ICUSTAY_ID"])
subject_icu_dict

{65537.0: {234193.0},
 4.0: {294638.0},
 65541.0: {269604.0},
 6.0: {nan, nan, 228232.0, nan},
 65544.0: {293457.0},
 65547.0: {nan},
 12.0: {nan, 232669.0},
 13.0: {263738.0},
 17.0: {nan, nan, nan, nan, nan, nan, 257980.0, 277042.0},
 65554.0: {nan},
 65555.0: {213945.0},
 20.0: {nan, nan, 264490.0, nan, nan},
 21.0: {nan, 216859.0, 217847.0},
 65558.0: {nan, nan, nan, nan, nan, 271141.0, nan, nan, 214877.0, 283002.0},
 23.0: {227807.0},
 24.0: {nan},
 26.0: {244882.0},
 28.0: {225559.0},
 65565.0: {nan, nan, nan, nan, nan, nan, nan, 276116.0},
 31.0: {254478.0},
 65569.0: {nan, nan, nan, nan, 228839.0},
 34.0: {290505.0},
 35.0: {282039.0},
 36.0: {nan,
  nan,
  nan,
  241249.0,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  211200.0,
  280987.0},
 37.0: {213503.0},
 38.0: {248910.0},
 43697.0: {229941.0},
 41.0: {237024.0, nan, 261027.0, nan, nan},
 42.0: {210828.0},
 43.0: {nan, nan, 225852.0},
 44.0: {nan, 291554.0},
 65582.0: {nan, 231719.0, 279995.0, 297063.0},
 49.0

In [27]:
for subject in subject_icu_dict:
    subject_icu_dict[subject] = pd.Series(list(subject_icu_dict[subject])).unique()
#for subject in subject_icu_dict:
 #   subject_icu_dict[subject] = list(subject_icu_dict[subject])

In [28]:
subject_icu_dict

{65537.0: array([ 234193.]),
 4.0: array([ 294638.]),
 65541.0: array([ 269604.]),
 6.0: array([     nan,  228232.]),
 65544.0: array([ 293457.]),
 65547.0: array([ nan]),
 12.0: array([     nan,  232669.]),
 13.0: array([ 263738.]),
 17.0: array([     nan,  277042.,  257980.]),
 65554.0: array([ nan]),
 65555.0: array([ 213945.]),
 20.0: array([     nan,  264490.]),
 21.0: array([     nan,  216859.,  217847.]),
 65558.0: array([     nan,  271141.,  283002.,  214877.]),
 23.0: array([ 227807.]),
 24.0: array([ nan]),
 26.0: array([ 244882.]),
 28.0: array([ 225559.]),
 65565.0: array([     nan,  276116.]),
 31.0: array([ 254478.]),
 65569.0: array([     nan,  228839.]),
 34.0: array([ 290505.]),
 35.0: array([ 282039.]),
 36.0: array([     nan,  241249.,  211200.,  280987.]),
 37.0: array([ 213503.]),
 38.0: array([ 248910.]),
 43697.0: array([ 229941.]),
 41.0: array([ 237024.,      nan,  261027.]),
 42.0: array([ 210828.]),
 43.0: array([     nan,  225852.]),
 44.0: array([     nan, 

In [29]:
subject_icu_binary_dict = dict()
for subject in subject_icu_dict:
    if (np.isnan(subject_icu_dict[subject]).all()):
        subject_icu_binary_dict[subject] = 0
    else:
        subject_icu_binary_dict[subject] = 1
subject_icu_binary_dict

{65537.0: 1,
 4.0: 1,
 65541.0: 1,
 6.0: 1,
 65544.0: 1,
 11.0: 1,
 12.0: 1,
 13.0: 1,
 17.0: 1,
 65554.0: 0,
 65555.0: 1,
 20.0: 1,
 21.0: 1,
 65558.0: 1,
 23.0: 1,
 24.0: 0,
 26.0: 1,
 28.0: 1,
 65565.0: 1,
 31.0: 1,
 65569.0: 1,
 34.0: 1,
 35.0: 1,
 36.0: 1,
 37.0: 1,
 38.0: 1,
 41.0: 1,
 42.0: 1,
 43.0: 1,
 44.0: 1,
 65582.0: 1,
 49.0: 1,
 52.0: 1,
 65589.0: 1,
 55.0: 1,
 56.0: 1,
 65594.0: 1,
 61.0: 1,
 64.0: 1,
 65.0: 1,
 65547.0: 0,
 68.0: 1,
 72.0: 1,
 65609.0: 1,
 75.0: 1,
 65613.0: 1,
 78.0: 1,
 79.0: 1,
 80.0: 1,
 81.0: 1,
 85.0: 1,
 86.0: 1,
 65623.0: 1,
 65627.0: 1,
 93.0: 1,
 94.0: 1,
 95.0: 1,
 96.0: 1,
 97.0: 1,
 65634.0: 1,
 103.0: 1,
 106.0: 1,
 107.0: 1,
 109.0: 1,
 65646.0: 1,
 111.0: 1,
 112.0: 1,
 113.0: 1,
 65650.0: 1,
 115.0: 1,
 65652.0: 1,
 117.0: 1,
 65654.0: 1,
 65656.0: 1,
 65556.0: 1,
 123.0: 1,
 124.0: 1,
 125.0: 1,
 65666.0: 0,
 133.0: 1,
 134.0: 1,
 135.0: 1,
 136.0: 0,
 137.0: 1,
 138.0: 1,
 65675.0: 1,
 140.0: 1,
 141.0: 1,
 142.0: 1,
 143.0: 1,
 144.

In [66]:
subject_icu_df = pd.DataFrame(list(subject_icu_binary_dict.items()), columns=["SUBJECT_ID", "ICU_STAY"])
subject_details = pd.merge(subject_details, subject_icu_df, on="SUBJECT_ID", how='left')
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY
0,249,0,0,8.0,0.0,1,0,1
1,250,0,1,6.0,0.0,1,0,1
2,252,1,0,4.0,0.0,1,0,1
3,255,1,0,2.0,0.0,1,0,0
4,256,1,0,22.0,0.0,1,0,1


In [67]:
'''subject_details.fillna(0, inplace=True)
subject_details["ICU_STAY"] = np.where(subject_details["ICUSTAY_ID"] > 0, 1, 0)
len(subject_details[subject_details["ICU_STAY"] == 0])'''
len(subject_details)

29991

In [68]:
#subject_details.drop(["ICUSTAY_ID"], axis=1, inplace=True)
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY
0,249,0,0,8.0,0.0,1,0,1
1,250,0,1,6.0,0.0,1,0,1
2,252,1,0,4.0,0.0,1,0,1
3,255,1,0,2.0,0.0,1,0,0
4,256,1,0,22.0,0.0,1,0,1


### Count For Each Drug

In [33]:
both_drugs

['oxymorphone',
 'oxycodone',
 'morphine',
 'meperidine',
 'hydromorphone',
 'hydrocodone',
 'fentanyl',
 'codeine',
 'buprenorphine',
 'levorphanol',
 'methadone',
 'methadone',
 'naloxone',
 'nalorphine']

In [35]:
oxymorphone_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("oxymorphone")]
print("oxymorphone: %i" %len(oxymorphone_df))
oxymorphone_df = oxymorphone_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
oxymorphone_df = oxymorphone_df["NO_OF_DAYS_PRESCRIBED"]
oxymorphone_df.to_csv("oxymorphone_df.csv")
oxymorphone_df = pd.read_csv("oxymorphone_df.csv", header=None)
oxymorphone_df.columns=["SUBJECT_ID", "OXYMORPHONE"]
oxymorphone_df.to_csv("oxymorphone_df.csv")
oxymorphone_df.head()

oxymorphone: 4


Unnamed: 0,SUBJECT_ID,OXYMORPHONE
0,69650.0,8.0


In [36]:
oxycodone_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("oxycodone")]
print("oxycodone: %i" %len(oxycodone_df))
oxycodone_df = oxycodone_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
oxycodone_df = oxycodone_df["NO_OF_DAYS_PRESCRIBED"]
oxycodone_df.to_csv("oxycodone_df.csv")
oxycodone_df = pd.read_csv("oxycodone_df.csv", header=None)
oxycodone_df.columns=["SUBJECT_ID", "OXYCODONE"]
oxycodone_df.to_csv("oxycodone_df.csv")
oxycodone_df.head()

oxycodone: 44538


Unnamed: 0,SUBJECT_ID,OXYCODONE
0,6.0,15.0
1,11.0,30.0
2,13.0,8.0
3,17.0,11.0
4,20.0,7.0


In [37]:
morphine_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("morphine")]
print("morphine: %i" %len(morphine_df))
morphine_df = morphine_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
morphine_df = morphine_df["NO_OF_DAYS_PRESCRIBED"]
morphine_df.to_csv("morphine_df.csv")
morphine_df = pd.read_csv("morphine_df.csv", header=None)
morphine_df.columns=["SUBJECT_ID", "MORPHINE"]
morphine_df.to_csv("morphine_df.csv")
morphine_df.head()

morphine: 68045


Unnamed: 0,SUBJECT_ID,MORPHINE
0,6.0,11.0
1,11.0,1.0
2,12.0,8.0
3,13.0,4.0
4,17.0,9.0


In [38]:
meperidine_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("meperidine")]
print("meperidine: %i" %len(meperidine_df))
meperidine_df = meperidine_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
meperidine_df = meperidine_df["NO_OF_DAYS_PRESCRIBED"]
meperidine_df.to_csv("meperidine_df.csv")
meperidine_df = pd.read_csv("meperidine_df.csv", header=None)
meperidine_df.columns=["SUBJECT_ID", "MEPERIDINE"]
meperidine_df.to_csv("meperidine_df.csv")
meperidine_df.head()

meperidine: 6622


Unnamed: 0,SUBJECT_ID,MEPERIDINE
0,11.0,1.0
1,13.0,4.0
2,17.0,5.0
3,20.0,2.0
4,23.0,4.0


In [39]:
hydromorphone_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("hydromorphone")]
print("hydromorphone: %i" %len(hydromorphone_df))
hydromorphone_df = hydromorphone_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
hydromorphone_df = hydromorphone_df["NO_OF_DAYS_PRESCRIBED"]
hydromorphone_df.to_csv("hydromorphone_df.csv")
hydromorphone_df = pd.read_csv("hydromorphone_df.csv", header=None)
hydromorphone_df.columns=["SUBJECT_ID", "HYDROMORPHONE"]
hydromorphone_df.to_csv("hydromorphone_df.csv")
hydromorphone_df.head()

hydromorphone: 53202


Unnamed: 0,SUBJECT_ID,HYDROMORPHONE
0,6.0,5.0
1,11.0,19.0
2,17.0,8.0
3,21.0,16.0
4,36.0,10.0


In [40]:
hydrocodone_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("hydrocodone")]
print("hydrocodone: %i" %len(hydrocodone_df))
hydrocodone_df = hydrocodone_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
hydrocodone_df = hydrocodone_df["NO_OF_DAYS_PRESCRIBED"]
hydrocodone_df.to_csv("hydrocodone_df.csv")
hydrocodone_df = pd.read_csv("hydrocodone_df.csv", header=None)
hydrocodone_df.columns=["SUBJECT_ID", "HYDROCODONE"]
hydrocodone_df.to_csv("hydrocodone_df.csv")
hydrocodone_df.head()

hydrocodone: 1998


Unnamed: 0,SUBJECT_ID,HYDROCODONE
0,109.0,4.0
1,178.0,3.0
2,335.0,2.0
3,352.0,5.0
4,402.0,36.0


In [42]:
fentanyl_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("fentanyl")]
print("fentanyl: %i" %len(fentanyl_df))
fentanyl_df = fentanyl_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
fentanyl_df = fentanyl_df["NO_OF_DAYS_PRESCRIBED"]
fentanyl_df.to_csv("fentanyl_df.csv")
fentanyl_df = pd.read_csv("fentanyl_df.csv", header=None)
fentanyl_df.columns=["SUBJECT_ID", "FENTANYL"]
fentanyl_df.to_csv("fentanyl_df.csv")
fentanyl_df.head()

fentanyl: 35734


Unnamed: 0,SUBJECT_ID,FENTANYL
0,12.0,6.0
1,24.0,1.0
2,34.0,1.0
3,36.0,14.0
4,38.0,23.0


In [43]:
codeine_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("codeine")]
print("codeine: %i" %len(codeine_df))
codeine_df = codeine_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
codeine_df = codeine_df["NO_OF_DAYS_PRESCRIBED"]
codeine_df.to_csv("codeine_df.csv")
codeine_df = pd.read_csv("codeine_df.csv", header=None)
codeine_df.columns=["SUBJECT_ID", "CODEINE"]
codeine_df.to_csv("codeine_df.csv")
codeine_df.head()

codeine: 2356


Unnamed: 0,SUBJECT_ID,CODEINE
0,4.0,8.0
1,85.0,6.0
2,103.0,3.0
3,111.0,4.0
4,165.0,2.0


In [45]:
buprenorphine_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("buprenorphine")]
print("buprenorphine_df: %i" %len(buprenorphine_df))
buprenorphine_df = buprenorphine_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
buprenorphine_df = buprenorphine_df["NO_OF_DAYS_PRESCRIBED"]
buprenorphine_df.to_csv("buprenorphine_df.csv")
buprenorphine_df = pd.read_csv("buprenorphine_df.csv", header=None)
buprenorphine_df.columns=["SUBJECT_ID", "BUPRENORPHINE"]
buprenorphine_df.to_csv("buprenorphine_df.csv")
buprenorphine_df.head()

buprenorphine_df: 50


Unnamed: 0,SUBJECT_ID,BUPRENORPHINE
0,14779.0,15.0
1,19412.0,39.0
2,22424.0,4.0
3,25876.0,10.0
4,26027.0,8.0


In [52]:
methadone_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("methadone")]
print("methadone: %i" %len(methadone_df))
methadone_df = methadone_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
methadone_df = methadone_df["NO_OF_DAYS_PRESCRIBED"]
methadone_df.to_csv("methadone_df.csv")
methadone_df = pd.read_csv("methadone_df.csv", header=None)
methadone_df.columns=["SUBJECT_ID", "METHADONE"]
methadone_df.to_csv("methadone_df.csv")
methadone_df.head()

methadone: 4398


Unnamed: 0,SUBJECT_ID,METHADONE
0,64.0,13.0
1,78.0,13.0
2,124.0,8.0
3,135.0,15.0
4,143.0,43.0


In [48]:
levorphanol_df = narcotic_subject_df[narcotic_subject_df["DRUG"].str.contains("levorphanol")]
print("levorphanol: %i" %len(levorphanol_df))
'''levorphanol_df = levorphanol_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
levorphanol_df = levorphanol_df["NO_OF_DAYS_PRESCRIBED"]
levorphanol_df.to_csv("levorphanol_df.csv")
levorphanol_df = pd.read_csv("levorphanol_df.csv", header=None)
levorphanol_df.columns=["SUBJECT_ID", "LEVORPHANOL"]
levorphanol_df.to_csv("levorphanol_df.csv")
levorphanol_df.head()'''

levorphanol: 0


'levorphanol_df = levorphanol_df.groupby(\'SUBJECT_ID\').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))\nlevorphanol_df = levorphanol_df["NO_OF_DAYS_PRESCRIBED"]\nlevorphanol_df.to_csv("levorphanol_df.csv")\nlevorphanol_df = pd.read_csv("levorphanol_df.csv", header=None)\nlevorphanol_df.columns=["SUBJECT_ID", "LEVORPHANOL"]\nlevorphanol_df.to_csv("levorphanol_df.csv")\nlevorphanol_df.head()'

In [49]:
naloxone_df = anti_narcotic_subject_df[anti_narcotic_subject_df["DRUG"].str.contains("naloxone")]
print("naloxone: %i" %len(naloxone_df))
naloxone_df = naloxone_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
naloxone_df = naloxone_df["NO_OF_DAYS_PRESCRIBED"]
naloxone_df.to_csv("naloxone_df.csv")
naloxone_df = pd.read_csv("naloxone_df.csv", header=None)
naloxone_df.columns=["SUBJECT_ID", "NALOXONE"]
naloxone_df.to_csv("naloxone_df.csv")
naloxone_df.head()

naloxone: 1274


Unnamed: 0,SUBJECT_ID,NALOXONE
0,36.0,2.0
1,78.0,10.0
2,94.0,3.0
3,109.0,7.0
4,135.0,3.0


In [51]:
nalorphine_df = anti_narcotic_subject_df[anti_narcotic_subject_df["DRUG"].str.contains("nalorphine")]
print("nalorphine: %i" %len(nalorphine_df))
'''nalorphine_df = nalorphine_df.groupby('SUBJECT_ID').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))
nalorphine_df = nalorphine_df["NO_OF_DAYS_PRESCRIBED"]
nalorphine_df.to_csv("nalorphine_df.csv")
nalorphine_df = pd.read_csv("nalorphine_df.csv", header=None)
nalorphine_df.columns=["SUBJECT_ID", "NALORPHINE"]
nalorphine_df.to_csv("nalorphine_df.csv")
nalorphine_df.head()'''

nalorphine: 0


'nalorphine_df = nalorphine_df.groupby(\'SUBJECT_ID\').apply(lambda row: row[["NO_OF_DAYS_PRESCRIBED"]].apply(lambda y: y.sum()))\nnalorphine_df = nalorphine_df["NO_OF_DAYS_PRESCRIBED"]\nnalorphine_df.to_csv("nalorphine_df.csv")\nnalorphine_df = pd.read_csv("nalorphine_df.csv", header=None)\nnalorphine_df.columns=["SUBJECT_ID", "NALORPHINE"]\nnalorphine_df.to_csv("nalorphine_df.csv")\nnalorphine_df.head()'

In [69]:
subject_details = pd.merge(subject_details, oxymorphone_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, oxycodone_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, morphine_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, meperidine_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, hydromorphone_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, hydrocodone_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, fentanyl_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, codeine_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, buprenorphine_df, on="SUBJECT_ID", how='left')
#subject_details = pd.merge(subject_details, levorphanol_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, methadone_df, on="SUBJECT_ID", how='left')
subject_details = pd.merge(subject_details, naloxone_df, on="SUBJECT_ID", how='left')
#subject_details = pd.merge(subject_details, nalorphine_df, on="SUBJECT_ID", how='left')
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,MORPHINE,MEPERIDINE,HYDROMORPHONE,HYDROCODONE,FENTANYL,CODEINE,BUPRENORPHINE,METHADONE,NALOXONE
0,249,0,0,8.0,0.0,1,0,1,,4.0,17.0,,12.0,,,,,,
1,250,0,1,6.0,0.0,1,0,1,,,2.0,,,,15.0,,,,
2,252,1,0,4.0,0.0,1,0,1,,,,2.0,,,14.0,,,,
3,255,1,0,2.0,0.0,1,0,0,,4.0,4.0,,,,,,,,
4,256,1,0,22.0,0.0,1,0,1,,31.0,4.0,,5.0,,9.0,,,,


In [71]:
subject_details.fillna(0, inplace=True)
subject_details.isnull().sum()

SUBJECT_ID                          0
GENDER                              0
EXPIRE_FLAG                         0
TOTAL_NARCOTIC_PRESCRIPTONS         0
TOTAL_ANTI_NARCOTIC_PRESCRIPTONS    0
NARCOTIC                            0
ANTI-NARCOTIC                       0
ICU_STAY                            0
OXYMORPHONE                         0
OXYCODONE                           0
MORPHINE                            0
MEPERIDINE                          0
HYDROMORPHONE                       0
HYDROCODONE                         0
FENTANYL                            0
CODEINE                             0
BUPRENORPHINE                       0
METHADONE                           0
NALOXONE                            0
dtype: int64

In [73]:
subject_details

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,MORPHINE,MEPERIDINE,HYDROMORPHONE,HYDROCODONE,FENTANYL,CODEINE,BUPRENORPHINE,METHADONE,NALOXONE
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,17.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,0.0,2.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,4.0,0.0,5.0,0.0,9.0,0.0,0.0,0.0,0.0
5,262,1,0,2.0,0.0,1,0,1,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,263,1,1,3.0,0.0,1,0,1,0.0,5.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,265,1,0,7.0,0.0,1,0,1,0.0,8.0,10.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,266,0,0,1.0,0.0,1,0,0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,267,0,0,11.0,0.0,1,0,1,0.0,1.0,2.0,0.0,19.0,0.0,7.0,0.0,0.0,0.0,0.0


In [84]:
check_subjects = set(narcotic_subject_df["SUBJECT_ID"]).union(set(anti_narcotic_subject_df["SUBJECT_ID"]))
check = check_subjects.intersection(subjects_with_effects)
len(check)

749

In [85]:
subjects_with_effects_dict = dict()
for subject in subjects_with_effects:
    subjects_with_effects_dict[subject] = 1
subjects_with_effects_df = pd.DataFrame(list(subjects_with_effects_dict.items()), columns=["SUBJECT_ID", "SIDE_EFFECTS_FLAG"])
subjects_with_effects_df.head()

Unnamed: 0,SUBJECT_ID,SIDE_EFFECTS_FLAG
0,25259,1
1,10246,1
2,63496,1
3,6828,1
4,23895,1


In [90]:
subject_details = pd.merge(subject_details, subjects_with_effects_df, on="SUBJECT_ID", how='left')
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,MORPHINE,MEPERIDINE,HYDROMORPHONE,HYDROCODONE,FENTANYL,CODEINE,BUPRENORPHINE,METHADONE,NALOXONE,SIDE_EFFECTS_FLAG
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,17.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,0.0,2.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,4.0,0.0,5.0,0.0,9.0,0.0,0.0,0.0,0.0,


In [92]:
#len(subject_details[subject_details["SIDE_EFFECTS_FLAG"] == 1])
subject_details.isnull().sum()
subject_details.fillna(0, inplace=True)

In [93]:
#subject_details.drop(["SIDE_EFFECTS_FLAG"], axis=1, inplace=True)
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,MORPHINE,MEPERIDINE,HYDROMORPHONE,HYDROCODONE,FENTANYL,CODEINE,BUPRENORPHINE,METHADONE,NALOXONE,SIDE_EFFECTS_FLAG
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,17.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,0.0,2.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,4.0,0.0,5.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0


In [94]:
len(subject_details[subject_details["SIDE_EFFECTS_FLAG"]==1])

749

In [96]:
subject_details = pd.merge(subject_details, subjects_age_df, on="SUBJECT_ID", how='left')
print(len(subject_details))
subject_details.head()

29991


Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,...,MEPERIDINE,HYDROMORPHONE,HYDROCODONE,FENTANYL,CODEINE,BUPRENORPHINE,METHADONE,NALOXONE,SIDE_EFFECTS_FLAG,AGE
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,...,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,81.0
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,...,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,23.0
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,...,2.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,55.0
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,77.0
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,...,0.0,5.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,84.0


In [97]:
with open('../Data/Opioids/Model_Data/subject_details.pickle', 'wb') as pickler:
    pickle.dump(subject_details, pickler, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
with open('../Data/Opioids/Model_Data/subject_details.pickle', 'rb') as pickler:
    subject_details = pickle.load(pickler)

In [4]:
subject_details["AGE_1"] = 0
subject_details["AGE_2"] = 0
subject_details["AGE_3"] = 0
subject_details["AGE_4"] = 0
subject_details["AGE_5"] = 0
subject_details["AGE_6"] = 0
subject_details["AGE_7"] = 0
subject_details["AGE_8"] = 0

In [5]:
subject_details.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,...,SIDE_EFFECTS_FLAG,AGE,AGE_1,AGE_2,AGE_3,AGE_4,AGE_5,AGE_6,AGE_7,AGE_8
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,...,0.0,81.0,0,0,0,0,0,0,0,0
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,...,0.0,23.0,0,0,0,0,0,0,0,0
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,...,0.0,55.0,0,0,0,0,0,0,0,0
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,...,0.0,77.0,0,0,0,0,0,0,0,0
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,...,0.0,84.0,0,0,0,0,0,0,0,0


In [3]:
for index, row in subject_details_age.iterrows():
    age = row["AGE"]
    if (age < 13):
        subject_details_age.iloc[index]["AGE_1"] = 1
    elif (age >= 13 and age < 20):
        subject_details_age.iloc[index]["AGE_2"] = 1
    elif (age >= 20 and age < 41):
        subject_details_age.iloc[index]["AGE_3"] = 1
    elif (age >= 41 and age < 51):
        subject_details_age.iloc[index]["AGE_4"] = 1
    elif (age >= 51 and age < 66):
        subject_details_age.iloc[index]["AGE_5"] = 1
    elif (age >= 66 and age < 76):
        subject_details_age.iloc[index]["AGE_6"] = 1
    elif (age >= 76 and age < 86):
        subject_details_age.iloc[index]["AGE_7"] = 1
    elif (age >= 86):
        subject_details_age.iloc[index]["AGE_8"] = 1
    if (index%100 == 0):
        print("Rows iterated: %i" %index)
subject_details_age.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Rows iterated: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Rows iterated: 100
Rows iterated: 200
Rows iterated: 300
Rows iterated: 400
Rows iterated: 500
Rows iterated: 600
Rows iterated: 700
Rows iterated: 800
Rows iterated: 900
Rows iterated: 1000
Rows iterated: 1100
Rows iterated: 1200
Rows iterated: 1300
Rows iterated: 1400
Rows iterated: 1500
Rows iterated: 1600
Rows iterated: 1700
Rows iterated: 1800
Rows iterated: 1900
Rows iterated: 2000
Rows iterated: 2100
Rows iterated: 2200
Rows iterated: 2300
Rows iterated: 2400
Rows iterated: 2500
Rows iterated: 2600
Rows iterated: 2700
Rows iterated: 2800
Rows iterated: 2900
Rows iterated: 3000
Rows iterated: 3100
Rows iterated: 3200
Rows iterated: 3300
Rows iterated: 3400
Rows iterated: 3500
Rows iterated: 3600
Rows iterated: 3700
Rows iterated: 3800
Rows iterated: 3900
Rows iterated: 4000
Rows iterated: 4100
Rows iterated: 4200
Rows iterated: 4300
Rows iterated: 4400
Rows iterated: 4500
Rows iterated: 4600
Rows iterated: 4700
Rows iterated: 4800
Rows iterated: 4900
Rows iterated: 5000
Rows iter

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,...,SIDE_EFFECTS_FLAG,AGE,AGE_1,AGE_2,AGE_3,AGE_4,AGE_5,AGE_6,AGE_7,AGE_8
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,...,0.0,81.0,0,0,0,0,0,0,0,0
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,...,0.0,23.0,0,0,0,0,0,0,0,0
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,...,0.0,55.0,0,0,0,0,0,0,0,0
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,...,0.0,77.0,0,0,0,0,0,0,0,0
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,...,0.0,84.0,0,0,0,0,0,0,0,0


In [8]:
with open('../Data/Opioids/Model_Data/subject_details_age.pickle', 'wb') as pickler:
    pickle.dump(subject_details, pickler, protocol=pickle.HIGHEST_PROTOCOL)

In [5]:
with open('../Data/Opioids/Model_Data/subject_details_age.pickle', 'rb') as pickler:
    subject_details_age = pickle.load(pickler)
subject_details_age.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,...,SIDE_EFFECTS_FLAG,AGE,AGE_1,AGE_2,AGE_3,AGE_4,AGE_5,AGE_6,AGE_7,AGE_8
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,...,0.0,81.0,0,0,0,0,0,0,0,0
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,...,0.0,23.0,0,0,0,0,0,0,0,0
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,...,0.0,55.0,0,0,0,0,0,0,0,0
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,...,0.0,77.0,0,0,0,0,0,0,0,0
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,...,0.0,84.0,0,0,0,0,0,0,0,0


In [8]:
subjects_age_grp_dict = {1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0}
def age_group(row):
    age = row["AGE"]
    if (age < 13):
        row["AGE_1"] = 1
    elif (age >= 13 and age < 20):
        row["AGE_2"] = 1
    elif (age >= 20 and age < 41):
        row["AGE_3"] = 1
    elif (age >= 41 and age < 51):
        row["AGE_4"] = 1
    elif (age >= 51 and age < 66):
        row["AGE_5"] = 1
    elif (age >= 66 and age < 76):
        row["AGE_6"] = 1
    elif (age >= 76 and age < 86):
        row["AGE_7"] = 1
    elif (age >= 86):
        row["AGE_8"] = 1
    return row

In [19]:
def age_group(age):
    group = 0
    if (age < 13):
        group = 1
    elif (age >= 13 and age < 20):
        group = 2
    elif (age >= 20 and age < 41):
        group = 3
    elif (age >= 41 and age < 51):
        group = 4
    elif (age >= 51 and age < 66):
        group = 5
    elif (age >= 66 and age < 76):
        group = 6
    elif (age >= 76 and age < 86):
        group = 7
    elif (age >= 86):
        group = 8
    return group

In [14]:
subjects_age_grp_dict = {1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0}
def age_group():
    for index, row in subject_details_age.iterrows():
        age = row["AGE"]
        if (age < 13):
            subject_details_age["AGE_1"][row.AGE<13] = 1
        elif (age >= 13 and age < 20):
            subject_details_age["AGE_2"][row.AGE >= 13 and row.AGE < 20] = 1
        elif (age >= 20 and age < 41):
            subject_details_age["AGE_3"][row.AGE >= 20 and row.AGE < 41] = 1
        elif (age >= 41 and age < 51):
            subject_details_age["AGE_4"][row.AGE >= 41 and row.AGE < 51] = 1
        elif (age >= 51 and age < 66):
            subject_details_age["AGE_5"][row.AGE >= 51 and row.AGE < 66] = 1
        elif (age >= 66 and age < 76):
            subject_details_age["AGE_6"][row.AGE >= 66 and row.AGE < 76] = 1
        elif (age >= 76 and age < 86):
            subject_details_age["AGE_7"][row.AGE >= 76 and row.AGE < 86] = 1
        elif (age >= 86):
            subject_details_age["AGE_8"][row.AGE >= 86] = 1
        if (index%100 == 0):
            print("Rows iterated: %i" %index)
    return subject_details_age

In [20]:
subject_details_age_new["AGE_GROUP"] = subject_details_age_new["AGE"].map(age_group)
subject_details_age_new.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,...,AGE,AGE_1,AGE_2,AGE_3,AGE_4,AGE_5,AGE_6,AGE_7,AGE_8,AGE_GROUP
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,...,81.0,0,0,0,0,0,0,0,0,7
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,...,23.0,0,0,0,0,0,0,0,0,3
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,...,55.0,0,0,0,0,0,0,0,0,5
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,...,77.0,0,0,0,0,0,0,0,0,7
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,...,84.0,0,0,0,0,0,0,0,0,7


In [24]:
subject_details_age_new.drop(["AGE_1", "AGE_2","AGE_3","AGE_4","AGE_5","AGE_6","AGE_7","AGE_8"], axis=1, inplace=True)
subject_details_age_new.head()

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,TOTAL_NARCOTIC_PRESCRIPTONS,TOTAL_ANTI_NARCOTIC_PRESCRIPTONS,NARCOTIC,ANTI-NARCOTIC,ICU_STAY,OXYMORPHONE,OXYCODONE,...,HYDROMORPHONE,HYDROCODONE,FENTANYL,CODEINE,BUPRENORPHINE,METHADONE,NALOXONE,SIDE_EFFECTS_FLAG,AGE,AGE_GROUP
0,249,0,0,8.0,0.0,1,0,1,0.0,4.0,...,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,81.0,7
1,250,0,1,6.0,0.0,1,0,1,0.0,0.0,...,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,23.0,3
2,252,1,0,4.0,0.0,1,0,1,0.0,0.0,...,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,55.0,5
3,255,1,0,2.0,0.0,1,0,0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,77.0,7
4,256,1,0,22.0,0.0,1,0,1,0.0,31.0,...,5.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,84.0,7


In [25]:
with open('../Data/Opioids/Model_Data/subject_details_age.pickle', 'wb') as pickler:
    pickle.dump(subject_details_age_new, pickler, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
Gender:
    Female 0
    Male 1
ICU STAY:
    NAN - No stay - 0
    If atleast one value - Stay - 1
EXPIRE_FLAG:
    Expired - 1
    Alive - 0
AGE:
    GRP 1: < 13
    GRP 2: 13 - 19
    GRP 3: 20 - 40
    GRP 4: 41 - 50
    GRP 5: 51 - 65
    GRP 6: 66 - 75
    GRP 7: 76 - 86
    GRP 8: >= 86