In [1]:
import pandas as pd
import sqlite3
from sqlite3 import Error
import requests
from IPython.display import display

In [2]:

def get_drug_name(classId, relaSource):
    my_url = 'https://rxnav.nlm.nih.gov/REST/rxclass/classMembers.json?classId=' + classId + '&relaSource=' + relaSource

    r = requests.get(url = my_url)
    data = r.json()
    terms = data['drugMemberGroup']['drugMember']
    lst = []

    for n, term in enumerate(terms):
        rxcui = terms[n]['minConcept']['name']
        lst.append(rxcui)
    
    return lst



def View(df, rows=False):
    '''Views all the columns of a dataframe '''
    if rows==True:
        with pd.option_context("display.max_columns", None, "display.max_rows", None):
            display(df)
    else:
        with pd.option_context("display.max_columns", None):
            display(df)

In [3]:
opiods = get_drug_name('N02A','ATC')
benzos = get_drug_name('D001569','DAILYMED')
dihyds = get_drug_name('D004095','DAILYMED')

In [4]:
conn = sqlite3.connect('mimic.db')
cursor = conn.cursor()

### Creating tables

In [5]:
cursor.execute("DROP TABLE IF EXISTS Patients")
cursor.execute("DROP TABLE IF EXISTS Admissions")
cursor.execute("DROP TABLE IF EXISTS Diagnoses_icd")
cursor.execute("DROP TABLE IF EXISTS D_icd_diagnoses")
cursor.execute("DROP TABLE IF EXISTS Prescriptions")


sql ='''CREATE TABLE Patients (
    row_id int  NOT NULL,
    subject_id int  NOT NULL,
    gender varchar(5)  NOT NULL,
    dob datetime  NOT NULL,
    dod datetime  NOT NULL,
    dod_hosp datetime  NULL,
    dod_ssn datetime  NULL,
    expire_flag varchar(5)  NOT NULL,
    PRIMARY KEY (subject_id)
  );'''

cursor.execute(sql)


sql ='''CREATE TABLE Admissions (
    row_id int  NOT NULL,
    subject_id int  NOT NULL,
    hadm_id int  NOT NULL,
    admittime datetime  NOT NULL,
    dischtime datetime  NOT NULL,
    deathtime datetime   NULL,
    admission_type varchar(50)  NOT NULL,
    admission_location varchar(50)  NOT NULL,
    discharge_location varchar(50)  NOT NULL,
    insurance varchar(255)  NOT NULL,
    language varchar(10)   NULL,
    religion varchar(50)   NULL,
    marital_status varchar(50)   NULL,
    ethnicity varchar(200)  NOT NULL,
    edregtime datetime   NULL,
    edouttime datetime   NULL,
    diagnosis varchar(300)  NOT NULL,
    hospital_expire_flag int  NOT NULL,
    has_chartevents_data int  NOT NULL,
    PRIMARY KEY (hadm_id),
    FOREIGN KEY (subject_id) REFERENCES Patients(subject_id)
  );'''

cursor.execute(sql)

sql ='''CREATE TABLE D_icd_diagnoses (
    row_id int  NOT NULL,
    icd9_code varchar(10) NOT NULL,
    short_title varchar(50) NOT NULL,
    long_title varchar(300) NOT NULL,
    PRIMARY KEY (icd9_code) 
  );'''
cursor.execute(sql)


sql ='''CREATE TABLE Diagnoses_icd (
    row_id int  NOT NULL,
    subject_id int  NOT NULL,
    hadm_id int  NOT NULL,
     seq_num int NOT NULL,
     icd9_code varchar(10) NOT NULL,
    PRIMARY KEY (hadm_id),
    FOREIGN KEY (icd9_code) REFERENCES D_icd_diagnoses(icd9_code)
  );'''
cursor.execute(sql)


 #insert code to create Prescriptions table
sql ='''CREATE TABLE Prescriptions (
    row_id int  NOT NULL,
    subject_id int  NOT NULL,
    hadm_id int  NOT NULL,
    startdate datetime NULL,
    enddate datetime NULL,
    drug_type varchar(50) NULL,
    drug varchar(300) NULL,
    drug_name_poe varchar(300) NULL,
    drug_name_generic varchar(300) NULL,
    formulary_drug_cd varchar(300) NULL,
    gsn int NULL,
    ndc real NULL,
    prod_strength varchar(300) NULL,
    dose_val_rx varchar(50) NULL,
    dose_unit_rx varchar(50) NULL,
    form_val_disp varchar(50) NULL,
    form_unit_disp varchar(50) NULL,
    route varchar(50) NULL,
    FOREIGN KEY (hadm_id) REFERENCES Diagnoses_icd(hadm_id),
    FOREIGN KEY (subject_id) REFERENCES Patients(subject_id)
  );'''
cursor.execute(sql)



print("Table(s) created successfully........")
conn.commit()

Table(s) created successfully........


### Data Cleaning 

In [6]:
patients = pd.read_csv('../../mimic-iii-clinical-database-1.4/PATIENTS.csv.gz', compression='gzip')

In [7]:
admissions = pd.read_csv('../../mimic-iii-clinical-database-1.4/ADMISSIONS.csv.gz', compression='gzip')

In [8]:
d_icd_diagnoses = pd.read_csv('../../mimic-iii-clinical-database-1.4/D_ICD_DIAGNOSES.csv.gz', compression='gzip')

In [9]:
diagnoses_icd = pd.read_csv('../../mimic-iii-clinical-database-1.4/DIAGNOSES_ICD.csv.gz', compression='gzip')

In [10]:
prescriptions = pd.read_csv('../../mimic-iii-clinical-database-1.4/PRESCRIPTIONS.csv.gz', compression='gzip', low_memory=False)

In [11]:
def find_drug(drug):
    return prescriptions.DRUG[prescriptions.DRUG.str.contains(drug, case=False, regex=True)].value_counts().to_frame()

In [12]:
def clean_drug(drug):
    return prescriptions.DRUG[prescriptions.DRUG.str.replace(drug, drug, case=False, regex=True)]

In [13]:
opiod_data = []
for opiod in opiods:
    data = find_drug(opiod)
    data['name'] = opiod
    data['class'] = 'opiods'
    opiod_data.append(data)
opiod_data = pd.concat(opiod_data)

In [14]:
benzo_data = []
for benzo in benzos:
    data = find_drug(benzo)
    data['name'] = benzo
    data['class'] = 'benzodiazepines'
    benzo_data.append(data)
benzo_data = pd.concat(benzo_data)

In [15]:
dihyd_data = []
for dihyd in dihyds:
    data = find_drug(dihyd)
    data['name'] = dihyd
    data['class'] = 'dihydropyridines'
    dihyd_data.append(data)
dihyd_data = pd.concat(dihyd_data)

In [16]:
opiod_data.reset_index(inplace=True)
benzo_data.reset_index(inplace=True)
dihyd_data.reset_index(inplace=True)

In [17]:
drug_data = pd.concat([opiod_data, benzo_data, dihyd_data], ignore_index=True)

In [18]:
drug_data = drug_data.drop(["DRUG"],axis=1)

In [19]:
drug_data.columns = ['drug_id','name','class']

In [20]:
drug_data

Unnamed: 0,drug_id,name,class
0,TraMADOL (Ultram),tramadol,opiods
1,traMADOL,tramadol,opiods
2,Buprenorphine-Naloxone (8mg-2mg),buprenorphine,opiods
3,Buprenorphine,buprenorphine,opiods
4,Buprenorphine HCl,buprenorphine,opiods
...,...,...,...
163,nifedipine,nifedipine,dihydropyridines
164,*NF* Nifedipine XL,nifedipine,dihydropyridines
165,Nifedipine (Bulk),nifedipine,dihydropyridines
166,Nimodipine,nimodipine,dihydropyridines


In [21]:
sql ='''CREATE TABLE Drugs (
    drug_id varchar(300)  NOT NULL,
    name varchar(300)  NOT NULL,
    class varchar(300)  NOT NULL,
    FOREIGN KEY (drug_id) REFERENCES Prescriptions(drug)
  );'''
cursor.execute(sql)


OperationalError: table Drugs already exists

###  Loading Cleaned Data into Database

In [None]:
patients.to_sql('Patients', conn, if_exists='replace', index = False)
conn.commit()

admissions.to_sql('Admissions', conn, if_exists='replace', index = False)
conn.commit()

d_icd_diagnoses.to_sql('D_icd_diagnoses', conn, if_exists='replace', index = False)
conn.commit()

diagnoses_icd.to_sql('Diagnoses_icd', conn, if_exists='replace', index = False)
conn.commit()

prescriptions.to_sql('Prescriptions', conn, if_exists='replace', index = False)
conn.commit()

drug_data.to_sql('Drugs', conn, if_exists='replace', index = False)
conn.commit()

### Querying the DIAGNOSES_ICD and D_ICD_DIAGNOSES tables

Number of patients with delirium

In [None]:
 sql = '''select count (distinct d.subject_id) as count_delirium
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%"''' 


pd.read_sql(sql, conn)

Identifying subject_IDs with delirium

In [None]:
 sql = '''select distinct d.subject_id 
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%"''' 


pd.read_sql(sql, conn)

Identifying ICD9_Codes with delirium

In [None]:
 sql = '''select distinct d.icd9_code 
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%"''' 


pd.read_sql(sql, conn)

In [None]:
 sql = '''select distinct d.icd9_code, dd.long_title
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%"''' 


pd.read_sql(sql, conn)

Number of patients with delirium subtypes

In [None]:
sql = '''select distinct d.subject_id id, d.icd9_code icd9, dd.long_title subtype
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%"'''

pd.read_sql(sql, conn)

In [None]:
sql = '''select subtype, count(*),
ROUND(count(*)*100.00/sum(count(subtype)) over(),1) as percent 
from (select distinct d.subject_id id, d.icd9_code icd9, dd.long_title subtype
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%")
group by subtype
order by 2 desc
'''

pd.read_sql(sql, conn)

In [None]:
1398 + 361 + 292 + 32 + 25

In [None]:
sql = '''select id, count(*) 
from (select distinct d.subject_id id, d.icd9_code icd9, dd.long_title subtype
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%")
group by id 
having count(*) > 1 
order by 2 desc'''

pd.read_sql(sql, conn)

### Querying the Prescriptions Table

In [None]:
sql = '''select * from prescriptions limit 5'''

pd.read_sql(sql, conn)

Number of patients with prescriptions

In [None]:
sql = '''select count (distinct subject_id) from prescriptions'''

pd.read_sql(sql, conn)

Number of patients with delirium with prescriptions

In [None]:
sql = '''select n as n, ROUND(n*100.00/2046,1) as percentage
from (select count (distinct subject_id) as n
from prescriptions
where subject_id in 
(SELECT distinct d.subject_id
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
WHERE long_title LIKE "%delirium%"))'''

pd.read_sql(sql, conn)

List of all drugs of interest

In [None]:
sql = '''select distinct drug_id from drugs'''

pd.read_sql(sql, conn)

Creating a "delrium" column in the prescriptions table

In [None]:
sql = '''select subject_id, case when subject_id in (select distinct d.subject_id 
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%") then 1 else 0 end as delirium, drug
from prescriptions
where drug in (select distinct drug_id from drugs)'''

pd.read_sql(sql, conn)

In [None]:
sql = '''select delirium, count(drug) 
from (select subject_id, case when subject_id in (select distinct d.subject_id 
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%") then 1 else 0 end as delirium, drug
from prescriptions
where drug in (select distinct drug_id from drugs)) 
group by delirium
order by 2 desc'''

pd.read_sql(sql, conn)

In [None]:
sql = '''select delirium, count(drug) 
from (select subject_id, case when subject_id in (select distinct d.subject_id 
from Diagnoses_icd d 
join D_icd_diagnoses dd ON d.icd9_code = dd.icd9_code
where long_title like "%delirium%") then 1 else 0 end as delirium, drug
from prescriptions
where drug in (select distinct drug_id from drugs where class="opiods")) 
group by delirium
order by 2 desc'''

pd.read_sql(sql, conn)