In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys
from dotenv import load_dotenv
load_dotenv("../.env")  # take environment variables
PROJECT_ROOT = os.environ.get("PROJECT_ROOT")
sys.path.append(PROJECT_ROOT)

import numpy as np
import pandas as pd
from glob import glob

from datetime import datetime
import utils.PATHS as PATHS
import utils.utils as utils
# import utils.emr_utils as emr_utils
# import utils.load_utils as load_utils

## Notes

The following were observed in the exploration below:
* Potentially useful prescription-related columns:
    - `Generic Drug Name`
* Consider datetime columns as reference for how long medication has been going in relation to lipid-lowering treatment

In [3]:
dd_fp_list = glob(os.path.join(PATHS.DRUG_DISPENSED, "*.csv"))

In [4]:
test = pd.read_csv(dd_fp_list[0])
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 145344 entries, 0 to 145343
Data columns (total 33 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   Institution Code           145344 non-null  object 
 1   Patient ID                 145344 non-null  object 
 2   Date of Birth              145344 non-null  object 
 3   Gender                     145344 non-null  object 
 4   Race                       145337 non-null  object 
 5   Country of Residence       2395 non-null    object 
 6   Resident Indicator         145339 non-null  object 
 7   Case No                    145344 non-null  object 
 8   Visit No                   145344 non-null  object 
 9   Drug Code                  3245 non-null    object 
 10  Drug Name                  145344 non-null  object 
 11  Drug Strength              127116 non-null  object 
 12  Drug Form                  140466 non-null  object 
 13  Generic Drug Id            14

## Questions

1. What to do when combinations within the lookback were observed?
2. The statin + ____ prescriptions should be addressed within lookback?

In [5]:
# # SPECIAL CORRECTION FACTORS
# {
#     # "pravastatin sodium": ,  # 20MG
#     "ezetimibe,simvastatin tab" : , # 10/10, 10/20
#     "ezetimibe": , # 10mg
#     "simvastatin": , # 10mg, 20mg
#     "atorvastatin": , # 10mg, 40mg, 20mg
#     "rosuvastatin": , # 10MG, 20MG


#     "lovastatin": , # 20mg
#     "metronidazole, nystatin": , # 500mg, 100,000 unit    
#     "nystatin": , # 100,000IU
# } 

In [6]:
test = test.dropna(subset=["Generic Drug Name"])

In [7]:
test["Generic Drug Name"] = test["Generic Drug Name"].str.lower()

In [8]:
drug_name = "nystatin"
# drug_name = "statin"
sub = test[test["Generic Drug Name"].str.startswith(drug_name)][["Generic Drug Name", "Drug Strength"]]
display(sub.head())
display(sub["Generic Drug Name"].unique())
display(sub["Drug Strength"].unique())

Unnamed: 0,Generic Drug Name,Drug Strength
11199,nystatin,"100,000IU/"
49657,nystatin,"100,000IU/"
53867,nystatin,"100,000IU/"
57686,nystatin,"100,000IU/"
57687,nystatin,"100,000IU/"


array(['nystatin'], dtype=object)

array(['100,000IU/'], dtype=object)

In [9]:
test[["Generic Drug Name", "Prescribed Quantity UOM",]]

Unnamed: 0,Generic Drug Name,Prescribed Quantity UOM
0,paracetamol,TAB
1,etanercept,SYR
2,calciferol,CAP
3,"calcium carbonate, vitamin d",TAB
4,metformin,TAB
...,...,...
145339,tears naturale,BOT
145340,atropine,AMPVIA
145341,glyceryl trinitrate,BOT
145342,lidocaine,AMPVIA


In [10]:
test[["Prescribed Quantity UOM", "Item Primary UOM", "Drug Strength", "Drug Form"]].head(20)

Unnamed: 0,Prescribed Quantity UOM,Item Primary UOM,Drug Strength,Drug Form
0,TAB,TAB,500mg,TAB
1,SYR,SYR,50mg/mL,SYR
2,CAP,CAP,50000 UNIT,TAB
3,TAB,TAB,450mg; 200 unit,TAB
4,TAB,TAB,500mg,TAB
5,TAB,TAB,30MG,LA
6,CAP,CAP,20MG,CAP
7,TAB,TAB,5mg,TAB
8,PKT,PC,30mg,DRS
9,PKT,PC,30mg,DRS


In [11]:
test.iloc[:,-8:]

Unnamed: 0,Start Date,Prescribed Quantity UOM,Admit Source Code,Admit Source Description,Case Status Code,Case Status Description,Case Type Code,Case Type Description
0,2015-12-18,TAB,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
1,2015-12-12,SYR,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
2,2015-12-16,CAP,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
3,2015-12-16,TAB,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
4,2015-12-16,TAB,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
...,...,...,...,...,...,...,...,...
145339,2015-12-24,BOT,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
145340,2015-12-26,AMPVIA,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
145341,2015-12-26,BOT,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
145342,2015-12-26,AMPVIA,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient


In [12]:
test[["Add Date", "Start Date"]].head(200)

Unnamed: 0,Add Date,Start Date
0,2015-12-18 12:17:07,2015-12-18
1,2015-12-12 08:41:50,2015-12-12
2,2015-12-16 12:27:59,2015-12-16
3,2015-12-16 12:27:58,2015-12-16
4,2015-12-16 12:27:59,2015-12-16
...,...,...
203,2015-12-29 12:21:31,2015-12-29
204,2015-12-29 12:21:32,2015-12-29
205,2015-12-29 13:11:37,2015-12-29
206,2015-12-29 12:21:32,2015-12-29


In [13]:
test[["Case Start Date", "Case End Date"]].head(200)

Unnamed: 0,Case Start Date,Case End Date
0,,
1,,
2,,
3,,
4,,
...,...,...
203,,
204,,
205,,
206,,


In [14]:
test[["Drug Prescribed Date From", "Drug Prescribed Date To"]].head(200)

Unnamed: 0,Drug Prescribed Date From,Drug Prescribed Date To
0,2015-12-18,2015-12-25
1,2015-12-12,2016-01-09
2,2015-12-16,2015-12-16
3,2015-12-16,2015-12-16
4,2015-12-16,2015-12-16
...,...,...
203,2015-12-29,2015-12-29
204,2015-12-29,2015-12-29
205,2015-12-29,2015-12-29
206,2015-12-29,2015-12-29


In [15]:
test[["Drug Code", "Drug Name", "Drug Strength", "Drug Form", "Generic Drug Id", "Generic Drug Name", "DRG Code", "DRG Code (AR-DRG V6.0)"]]

Unnamed: 0,Drug Code,Drug Name,Drug Strength,Drug Form,Generic Drug Id,Generic Drug Name,DRG Code,DRG Code (AR-DRG V6.0)
0,,Paracetamol 500mg Tab,500mg,TAB,478,paracetamol,Unknown,Unknown
1,,Etanercept 50mg/mL PF Syringe,50mg/mL,SYR,854,etanercept,Unknown,Unknown
2,,"CALCIFEROL 50,000 UNITS TABLET",50000 UNIT,TAB,96,calciferol,EXPUNKNOWN,EXPUNKNOWN
3,,"Calcium Carbonate 450mg, Vitamin D 200 unit Tab",450mg; 200 unit,TAB,105,"calcium carbonate, vitamin d",EXPUNKNOWN,EXPUNKNOWN
4,,MetFORMIN HCl 500mg Tab,500mg,TAB,400,metformin,EXPUNKNOWN,EXPUNKNOWN
...,...,...,...,...,...,...,...,...
145339,,Tears Naturale II Eye Drop 15mL,,EYE,622,tears naturale,Unknown,Unknown
145340,,Atropine Sulfate 600mcg/mL Inj,600mcg/mL,INJ AMP&VIA,54,atropine,Unknown,Unknown
145341,,Glyceryl Trinitrate 0.5mg Tab,0.5mg,SL,297,glyceryl trinitrate,Unknown,Unknown
145342,,Lidocaine (P/Free) 1% (50mg/5mL) Inj,1% 5mL,INJ AMP&VIA,367,lidocaine,Unknown,Unknown


In [16]:
# "DRG Code", "DRG Code (AR-DRG V6.0)" # diagnosis related group
test["DRG Code"].value_counts()

DRG Code
EXPUNKNOWN    117656
Unknown        22810
Name: count, dtype: int64

In [17]:
test["DRG Code (AR-DRG V6.0)"].value_counts()

DRG Code (AR-DRG V6.0)
EXPUNKNOWN    117656
Unknown        22810
Name: count, dtype: int64

In [18]:
test["Drug Status"].value_counts()

Drug Status
STDTABCAP     54723
02            34087
STDL2         33869
STDCRMOINT     4242
STDMIX         3908
07             1045
NF              939
06              750
STDINSUL        708
STDEEN          671
STDINHAL        573
03              482
SVC             453
05              430
14              430
STDSOL1         415
STDITEM         356
STDINFINJ       331
STDSUPP         249
SVL             211
22              208
STDSOL4         194
08              155
STDWEEK         144
STDSOL2         137
04              132
STDL2ORG        110
13               61
CT               53
20               51
70               30
15               29
10               18
60                3
16                1
Name: count, dtype: int64

In [19]:
test.head()

Unnamed: 0,Institution Code,Patient ID,Date of Birth,Gender,Race,Country of Residence,Resident Indicator,Case No,Visit No,Drug Code,...,Case End Date,Add Date,Start Date,Prescribed Quantity UOM,Admit Source Code,Admit Source Description,Case Status Code,Case Status Description,Case Type Code,Case Type Description
0,SGH,31d385b313558357cc92,1993-09-01,FEMALE,,,,e99d5ccf7a9a6cf00fa7,e99d5ccf7a9a6cf00fa7,,...,,2015-12-18 12:17:07,2015-12-18,TAB,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
1,SGH,b109f71856c858ec5aaa,1966-01-01,MALE,,,,034bc6d95a0702b8ecc8,034bc6d95a0702b8ecc8,,...,,2015-12-12 08:41:50,2015-12-12,SYR,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
2,SGH,036017bc373eaa70e386,1955-08-01,FEMALE,Chinese,,N,616c710f92d9bc4480fc,94d720b95aa70d97a8b5,,...,,2015-12-16 12:27:59,2015-12-16,CAP,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
3,SGH,036017bc373eaa70e386,1955-08-01,FEMALE,Chinese,,N,616c710f92d9bc4480fc,94d720b95aa70d97a8b5,,...,,2015-12-16 12:27:58,2015-12-16,TAB,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient
4,SGH,036017bc373eaa70e386,1955-08-01,FEMALE,Chinese,,N,616c710f92d9bc4480fc,94d720b95aa70d97a8b5,,...,,2015-12-16 12:27:59,2015-12-16,TAB,EXPUNKNOWN,Expected Unknown,EXPUNKNOWN,Expected Unknown,O,Outpatient


## End.