In [137]:
import pandas as pd
import numpy as np

In [91]:
# Create the Timestamp object 
ts = pd.Timestamp(year = 2020,  month = 5, day = 31,  
                  hour = 11, second = 49, tz = 'Asia/Singapore') 

In [68]:
medcheck_drug = pd.read_csv('../../dataset/drug/raw/medcheck_legacy_drugs.csv',
                            usecols = ['id','brand_name','generic_name','preparation','active','created_at','updated_at'], 
                            dtype={'code':'object'})
medcheck_drug.count()

id              18165
brand_name      18142
generic_name    18165
preparation       297
active          18165
created_at      18165
updated_at      18165
dtype: int64

In [106]:
medcheck_drug['form'] = ''
medcheck_drug['strength'] = ''
medcheck_drug['validity'] = ''
medcheck_drug.head(2)

Unnamed: 0,id,brand_name,generic_name,preparation,active,created_at,updated_at,form,strength,validity
0,63306,NOR-NS,0.8% Sodium Chloride,,True,2017-09-26 14:13:42,2017-09-26 14:13:42,,,
1,63307,A.N.B. 0.9% Sodium Chloride Solution,0.9% Sodium Chloride,,True,2017-09-26 14:13:42,2017-09-26 14:13:42,,,


In [126]:
fda_drug = pd.read_csv('../../dataset/drug/raw/fda_drugs.csv',
                       usecols = ['cpr_number','generic','brand','strength','form','validity'], 
                       sep=',',
                       encoding = "ISO-8859-1")

fda_drug = fda_drug.rename(columns={"cpr_number": "id", 
                                 "brand": "brand_name",
                                 "generic": "generic_name", 
                                 "preparation":"preparation", 
                                 "form":"form",
                                 "validity":"validity"})
fda_drug['active'] = True
fda_drug['preparation'] = ''
fda_drug['created_at'] = pd.to_datetime('today').strftime("%Y-%m-%d %H:%m:%S")
fda_drug['updated_at'] = pd.to_datetime('today').strftime("%Y-%m-%d %H:%m:%S")
fda_drug.head(2)

Unnamed: 0,id,generic_name,brand_name,strength,form,validity,active,preparation,created_at,updated_at
0,NO-004746,Purified Water (Distilled Water),-,-,-,2020-10-10,True,,2020-05-31 11:05:29,2020-05-31 11:05:29
1,DE-000331,Budesonide,Budecort Respules,250 mg/mL,Nebulizing Suspension (Sterile),2020-08-11,True,,2020-05-31 11:05:29,2020-05-31 11:05:29


In [127]:
fda_drug.count()

id              17028
generic_name    17028
brand_name      17028
strength        17028
form            17028
validity        17028
active          17028
preparation     17028
created_at      17028
updated_at      17028
dtype: int64

In [128]:
frames = [fda_drug, medcheck_drug]
merge_drug = pd.concat(frames)
merge_drug.count()

id              35193
generic_name    35193
brand_name      35170
strength        35193
form            35193
validity        35193
active          35193
preparation     17325
created_at      35193
updated_at      35193
dtype: int64

In [149]:
merge_drug = merge_drug.drop_duplicates(subset='id')
merge_drug = merge_drug.drop_duplicates(subset=['brand_name','generic_name','strength','form'])
merge_drug = merge_drug.sort_values(by=['id','generic_name','brand_name'])
merge_drug = merge_drug.replace('',np.nan, regex=True)
merge_drug.count()

id              34145
generic_name    34145
brand_name      34122
strength        15980
form            15980
validity        15980
active          34145
preparation       297
created_at      34145
updated_at      34145
dtype: int64

In [150]:
merge_drug.head()

Unnamed: 0,id,generic_name,brand_name,strength,form,validity,active,preparation,created_at,updated_at
0,63306,0.8% Sodium Chloride,NOR-NS,,,,True,,2017-09-26 14:13:42,2017-09-26 14:13:42
1,63307,0.9% Sodium Chloride,A.N.B. 0.9% Sodium Chloride Solution,,,,True,,2017-09-26 14:13:42,2017-09-26 14:13:42
2,63308,0.9% Sodium Chloride,Infusol NS,,,,True,,2017-09-26 14:13:42,2017-09-26 14:13:42
3,63309,0.9% Sodium Chloride,INTRA NaCl,,,,True,,2017-09-26 14:13:42,2017-09-26 14:13:42
4,63310,0.9% Sodium Chloride,MINI-BAG PLUS,,,,True,,2017-09-26 14:13:42,2017-09-26 14:13:42


In [151]:
merge_drug.to_csv('../../dataset/drug/processed/medcheck_drugs.2020-05-31.csv.gz', 
                  compression='gzip')

In [153]:
merge_drug.to_json('../../dataset/drug/processed/medcheck_drugs.2020-05-31.json',
                   orient='records')