# Build a dataset containing patients taking hypertension & diabetes drugs #
# And their related blood pressure and HbA1C #

In [1]:
import sys
import os

import numpy as np
import pandas as pd

# Load dataset #

In [6]:
#Load path
prescriptions_path = os.path.join(os.getcwd(), 'mimic-4-1.0/hosp/prescriptions.csv.gz')
labevents_path = os.path.join(os.getcwd(), 'mimic-4-1.0/hosp/labevents.csv.gz')
print(prescriptions_path)

/Users/apple/Desktop/MIMIC_database/mimic-4-1.0/hosp/prescriptions.csv.gz


In [2]:
def load_gz(input_path):
    chunks_list = []
    CHUNK_SIZE = 5000
    for chunk in pd.read_csv(input_path, sep=',', chunksize = CHUNK_SIZE, low_memory=False):
        chunks_list.append(chunk)
        del chunk
    df = pd.concat(chunks_list)
    return df

def load_sql_table(table_name):
    from sqlalchemy import create_engine
    con = create_engine("postgresql://postgres:1030@localhost/postgres").connect()
    df = pd.read_sql_table(table_name, con = con,index_col=False)  
    return df

In [25]:
hypertension_drugs = ['Atenolol','Metoprolol','Propranolol',
                      'Bisoprolol','Niften','Carvedilol','Concor' ,
                      'Lisinopril','Enalapril','Enalapril Maleate',
                      'Perindopril','Perindopril Erbumine',
                      'Perindopril(R)','Captopril','Candesartan',
                      'Irbesartan','Losartan','Losartan 100MG','Valsartan',
                      'Telmisartan','Amlodipine','Amlodipine + Valsatan 5/80MG',
                      'Adalat LA','Nifedipine LA','Diltiazem','Diltiazem SR',
                      'Prazosin','Hydralazine','Hydrochlorothiazide',
                      'Hydrochlorothiazide 25MG','Co-Aprovel 300/12.5',
                      'Aldactone','Spironolactone','Telmisartan + HCTZ 80/12.5MG']

In [4]:
diabetes_drugs = ['gliclaziden','gliclaziden mr','glipizide','diamicron mr',
                  'metformin','metformin xr','acarbose','januvia','sitagliptin',
                  'linagliptin','empagliflozin','dapagliflozin','actrapid',
                  'novomix','novorapid','insulin actrapid','insulin aspart',
                  'insulin detemir', 'janumet 50/850','sitagliptin + metformin 50/850'
                  'mixtard 30',
]
# hemoglobin_items = {
#     50805 : 'Carboxyhemoglobin',
#     50811 : 'Hemoglobin',
#     50814 : 'Methemoglobin',
#     50852 : 'Hemoglobin A1c',
#     50855 : 'Absolute Hemoglobin',
#     51212 : 'Fetal Hemoglobin',
#     51222 : 'Hemoglobin',
#     51223 : 'Hemoglobin A2',
#     51224 : 'Hemoglobin C',
#     51225 : 'Hemoglobin F',
#     51285 : 'Reticulocyte, Cellular Hemoglobin',
#     51631 : 'Glycated Hemoglobin',
#     51640 : 'Hemoglobin',
#     51641 : 'Hemoglobin  A',
#     51642 : 'Hemoglobin  A1',
#     51643 : 'Hemoglobin  A2',
#     51644 : 'Hemoglobin  C',
#     51645 : 'Hemoglobin, Calculated',
#     51646 : 'Hemoglobin  F',
#     51647 : 'Hemoglobin  S',
#     52032 : 'P50 of Hemoglobin',
#     52128 : 'Hemoglobin H Inclusion',
#     52129 : 'Hemoglobin Other',
#     52144 : 'Methemoglobin',
#     52157 : 'Plasma Hemoglobin',
# }
'glipizide','metformin','acarbose','januvia','sitagliptin','linagliptin','empagliflozin','dapagliflozin','insulin detemir','sitagliptin','metformin'

# Hypertension #

In [28]:
print([x.lower() for x in hypertension_drugs])

['atenolol', 'metoprolol', 'propranolol', 'bisoprolol', 'niften', 'carvedilol', 'concor', 'lisinopril', 'enalapril', 'enalapril maleate', 'perindopril', 'perindopril erbumine', 'perindopril(r)', 'captopril', 'candesartan', 'irbesartan', 'losartan', 'losartan 100mg', 'valsartan', 'telmisartan', 'amlodipine', 'amlodipine + valsatan 5/80mg', 'adalat la', 'nifedipine la', 'diltiazem', 'diltiazem sr', 'prazosin', 'hydralazine', 'hydrochlorothiazide', 'hydrochlorothiazide 25mg', 'co-aprovel 300/12.5', 'aldactone', 'spironolactone', 'telmisartan + hctz 80/12.5mg']


In [29]:
has = ('atenolol','metoprolol','propranolol','bisoprolol','carvedilol','lisinopril','enalapril','enalapril maleate','Captopril','candesartan','irbesartan','losartan','valsartan','telmisartan','amlodipine','diltiazem','prazosin','hydralazine','hydrochlorothiazide','aldactone','spironolactone')

In [36]:
#Save path
hypertension_save_path = os.path.join(os.getcwd(), 'hypertension')

In [35]:
hypertension_patients = load_sql_table('inter_4')
print(hypertension_patients.shape)

(305468, 38)


In [37]:
hypertension_patients = hypertension_patients.astype({'doses_per_24_hrs': 'float64','dose_val_rx':'float64'})
hypertension_patients['dose_val_rx'].head(5)

0    40.0
1     5.0
2     5.0
3    40.0
4    40.0
Name: dose_val_rx, dtype: float64

In [39]:
hypertension_patients['daily_dose'] = hypertension_patients['doses_per_24_hrs'] * hypertension_patients['dose_val_rx']

In [40]:
grouped = hypertension_patients.groupby('subject_id')

In [45]:
count = 0
for name, group in grouped:
    save = False
    drug_list = group['drug'].unique()
    for i in drug_list:
        temp = group[group['drug'] == i]
        if len(temp['daily_dose'].unique()) >= 4:
            save = True
            break
    if save:
        group.to_csv(f'{hypertension_save_path}/hypertension_dosages/{name}.csv', index=False)
        count += 1
print(count)

5883


# Diabetes #

In [3]:
#Save path
diabetes_save_path = os.path.join(os.getcwd(), 'diabetes')

In [17]:
hba1c_measurements = load_sql_table('hba1_measurements_5')
diabetes_patients = load_sql_table('diabetes_3')
print(hba1c_measurements.shape)
print(diabetes_patients.shape)

(1648, 16)
(733, 38)


In [18]:
diabetes_patients = diabetes_patients.astype({'doses_per_24_hrs': 'float64','dose_val_rx':'float64'})

In [19]:
diabetes_patients['dose_val_rx'].head(10)

0     5.0
1     5.0
2    10.0
3     5.0
4     5.0
5     5.0
6     5.0
7     5.0
8     5.0
9     5.0
Name: dose_val_rx, dtype: float64

In [20]:
diabetes_patients['daily_doses'] = diabetes_patients['doses_per_24_hrs'] * diabetes_patients['dose_val_rx']

In [24]:
diabetes_grouped = diabetes_patients.groupby('subject_id')
count = 0
for name, group in diabetes_grouped:
    save = False
    drug_list = group['drug'].unique()
    for i in drug_list:
        temp = group[group['drug'] == i]
        if len(temp['daily_doses'].unique()) >= 3:
            save = True
            break
    if save:
        group.to_csv(f'{diabetes_save_path}/dosages_3/{name}.csv',index=False)
#         group.to_csv(f'{diabetes_save_path}/dosages/{name}.csv',index=False)
#         hba1c = hba1c_measurements[hba1c_measurements['subject_id'] == name]
        hba1c.to_csv(f'{diabetes_save_path}/hba1c_3/{name}.csv',index=False)
        count += 1

print(count)

37
