In [1]:
import pandas as pd
import ast

def parse_df(df):
    columns_to_convert = ['form','roa','active_ingredient']
    def parse(el):
        if type(el) == str:
            return ast.literal_eval(el)
        return []
    for column in columns_to_convert:
        df[column] = df[column].apply(parse)
    return df

all_dailymed_drugs = parse_df(pd.read_csv("all_drugs_ade_indications_updated.csv"))


def construct_dict(df):
#     import ipdb; ipdb.set_trace()
    df = df.dropna()
    return {row['code_dailymed'] : str(int(row['code_ocrx'])) for _, row in df.iterrows()}
    
def load_matches():
    els = ['active_ingredient','form','roa']
    return {el: construct_dict(pd.read_csv(f"results/new_match_{el}.csv")) for el in els}

- [ ] Nombre de fichiers
- [ ] Nombre de medicaments
- [ ] Nombres de substances uniques
- [ ] Nombre de produits de substances multiples
- [ ] Try doing the cache of all OCRx/ DailyMed lookup tuples

In [2]:
matches = load_matches()

In [16]:
import os
import pandas as pd
EXPORT_DIR = "export/"
if not os.path.exists(EXPORT_DIR):
    os.mkdir(EXPORT_DIR)

terms = ['active_ingredient','form','roa']
def generate_export_df(term):
    df = pd.DataFrame(list(set(([(el['code'],el['name']) for value in all_dailymed_drugs[term].values for el in value if el['name'] is not None]))))
    df.columns = ['dailymed_code','dailymed_label']
    df = df.sort_values('dailymed_label')
    df.to_csv(f"{EXPORT_DIR}/dailymed_{term}_en.csv")

for term in terms:
    generate_export_df(term)

In [18]:
def export_drug(df):
    new_df = df[['code','drug']].copy()
    new_df.columns = ['da']
    

Unnamed: 0,code,drug
0,0378-7308,NORETHINDRONE AND ETHINYL ESTRADIOL AND FERROU...
1,65162-446,Entecavir
2,0603-4170,K-EFFERVESCENT
3,70934-751,Labetalol Hydrochloride
4,70518-2619,Irbesartan and Hydrochlorothiazide
...,...,...
46634,44087-1225,Cetrotide
46635,71610-121,Mirtazapine
46636,0406-1219,Amphetamine Sulfate
46637,42543-005,Sildenafil Citrate


In [3]:
def extend_current_df(df):
    def add_ocrx_handler(column):
        def handler(els):
            return [{**el, 'ocrx_code' : matches[column].get(el['code'],None)} for el in els]
        return handler
    
    columns = ['form','roa','active_ingredient']
    new_df = df.copy()
    for column in columns:
        new_df[column] = new_df[column].apply(add_ocrx_handler(column))
    return new_df

In [4]:
ocrx_df = extend_current_df(all_dailymed_drugs)

In [8]:
ocrx_df.to_csv("ocrx_dailymed.csv")

In [7]:
ocrx_df['active_ingredient'].iloc[0]

[{'name': 'ETHINYL ESTRADIOL',
  'code': '423D2T571U',
  'ocrx_code': '3000002514'},
 {'name': 'NORETHINDRONE', 'code': 'T18F433X4S', 'ocrx_code': '3000004496'}]

In [11]:
!mkdir ocrx-matches

In [1]:
# 1. Select rows that only have one active ingredient
# 2. Find that active ingredient in OCRx
'''Use QueryFact.queryM'''
import requests
from tqdm import tqdm
class Pipe:
    def __init__(self,fns):
        self.fns = fns
    def __call__(self,*start):
        args = start
        for fn in self.fns:
            args = fn(*args)
        return args

def select_one_active_ingredient(df): 
    return df.loc[df['active_ingredient'].apply(lambda el: len(el) == 1)]
#     return df.loc[map(lambda val: '|||' not in val,df['active_ingredient'].values)]

def parse_dl_response(dlresponse):
    if 'CCD' not in dlresponse:
        return None
#     return [row for row in dlresponse['CCD']]
    return {el: row['RxCui'] for row in dlresponse['CCD'] for el in row['labels']}
def convert_string(dailymed_string):
#     print(dailymed_string)
    return str(dailymed_string).upper()
def convert_comb_dailymed_ocrx(dailymed_string,field):
    df = matches[field]
    el = df.loc[df['label'] == dailymed_string]
    if el.shape[0] > 0:
        return el.iloc[0]['code_ocrx']
    else:
        return None
#     return matches[field].get(convert_string(dailymed_string),None)

def dlquery_and_dump(form_data,i,drug_name):
    parsed = dlquery(form_data)
    filename = f"ocrx-matches/{i:09}_{drug_name}.txt"
    total = {request : form_data, drug_name : drug_name, response : parsed}
    with open(filename,"w"):
        f.write(json.dumps(total))
        
def dlquery(form_data):
    
    if form_data['substance'] == '':
        return None
#     import ipdb; ipdb.set_trace()
    multipart_form_data = {key: (None,val) for key,val in form_data.items()}
    response = requests.post('http://localhost:8080/DLquery',files=multipart_form_data,headers={'Authorization': 'Bearer foo'}).json()
    parsed = parse_dl_response(response)
    return parsed

def match_with_ocrx(df):
    proper_names = {'active_ingredient': 'substance', 'form' : 'form','roa' : 'route'}
#     lookup_names = {'active_ingredient': 'active_ingredient','form':'form','roa':'roa'}
    fields = ['active_ingredient', 'form', 'roa']
    def make_codes_dict(row,field):
#         import ipdb; ipdb.set_trace()
        codes = [el['ocrx_code'] for el in row[field]]
        filtered_codes = [code for code in codes if code is not None]
        return ' '.join(filtered_codes)
    form_datas = [{proper_names[field] : make_codes_dict(row,field) for field in fields} for _, row in df.iterrows()]         
#     import ipdb; ipdb.set_trace()
    drug_names = df['drug'].values
    dl_results = {drug_name: dlquery_and_dump(comb,i,drug_name) for i, (drug_name, comb) in enumerate(tqdm(zip(drug_names,form_datas)))}
    return dl_results

In [None]:
dlquery({})

In [49]:
one_active = select_one_active_ingredient(ocrx_df)

In [50]:
one_active.loc[one_active['active_ingredient'].apply(lambda el: el[0]['ocrx_code'] == '3000004855')]

Unnamed: 0.1,Unnamed: 0,author,drug,active_ingredient,code,filename,form,roa,adverse_events_list,adverse_events,indications,active_ingredients,numerator_name,numerator_val,denominator_name,denominator_val
69,69,Northern Welding Supply,Oxygen,"[{'name': 'Oxygen', 'code': 'S88TT14065', 'ocr...",10825-002,prescription/20101007_97fdc5d0-b18d-40b8-a99e-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
102,102,MITCHELL WELDING SUPPLY,Oxygen,"[{'name': 'OXYGEN', 'code': 'S88TT14065', 'ocr...",72276-001,prescription/20211008_704755e8-239d-18d6-e053-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
137,137,Yosemite Medical Supply,Oxygen,"[{'name': 'Oxygen', 'code': 'S88TT14065', 'ocr...",39338-1072,prescription/20100719_8c0478bf-7ac1-467f-a2bc-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
147,147,Peachtree Medical,Oxygen,"[{'name': 'Oxygen', 'code': 'S88TT14065', 'ocr...",59225-0001,prescription/20130725_281e8cb5-27f3-4df2-9bc5-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
182,182,Linde Gas & Equipment Inc.,"Air, Compressed","[{'name': 'Oxygen', 'code': 'S88TT14065', 'ocr...",25373-006,prescription/20211225_b8b12e84-9e49-77de-700f-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46506,46506,Empresas De Soldaduras,Oxygen,"[{'name': 'OXYGEN', 'code': 'S88TT14065', 'ocr...",80421-100,prescription/20211014_6f57cec9-9132-4ba4-a033-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
46516,46516,Advacare Home Services,Oxygen,"[{'name': 'OXYGEN', 'code': 'S88TT14065', 'ocr...",65386-0001,prescription/20211005_985fb6bc-3f1e-41ac-a3ae-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
46517,46517,"Manuel Enterprises, Inc. dba Nepenthe Home Med...",Oxygen,"[{'name': 'OXYGEN', 'code': 'S88TT14065', 'ocr...",58089-001,prescription/20211005_97542e76-4122-4754-e053-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,
46581,46581,RXO2 Oxygen & Medical Equipment Supply Co. Inc.,Oxygen,"[{'name': 'OXYGEN', 'code': 'S88TT14065', 'ocr...",68639-0001,prescription/20211007_9db62731-bcb1-4793-ae12-...,"[{'name': 'GAS', 'code': 'C42933', 'ocrx_code'...","[{'name': 'RESPIRATORY (INHALATION)', 'code': ...",[],,,,,,,


In [51]:
import pickle
result = match_with_ocrx(one_active)
# result = "hi"
with open("ocrx_dailymed_match.pickle","wb") as f:
    pickle.dump(result,f)






0it [00:00, ?it/s][A[A[A[A[A




3it [00:01,  2.64it/s][A[A[A[A[A




4it [00:02,  1.62it/s][A[A[A[A[A




5it [00:03,  1.28it/s][A[A[A[A[A




6it [00:04,  1.11it/s][A[A[A[A[A




7it [00:05,  1.02it/s][A[A[A[A[A




9it [00:06,  1.33it/s][A[A[A[A[A




10it [00:07,  1.27it/s][A[A[A[A[A




11it [00:08,  1.23it/s][A[A[A[A[A




12it [00:09,  1.20it/s][A[A[A[A[A




13it [00:10,  1.17it/s][A[A[A[A[A




14it [00:11,  1.16it/s][A[A[A[A[A




16it [00:11,  1.86it/s][A[A[A[A[A




18it [00:12,  1.98it/s][A[A[A[A[A




19it [00:13,  1.70it/s][A[A[A[A[A




20it [00:14,  1.49it/s][A[A[A[A[A




21it [00:15,  1.37it/s][A[A[A[A[A




22it [00:15,  1.30it/s][A[A[A[A[A




23it [00:16,  1.24it/s][A[A[A[A[A




24it [00:17,  1.19it/s][A[A[A[A[A




25it [00:17,  1.57it/s][A[A[A[A[A




27it [00:18,  1.79it/s][A[A[A[A[A




28it [00:19,  1.55it/s][A[A[A[A[A




30it [00:20,  1.7

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [None]:
result

In [6]:
result = dlquery({'form' : '4000000043','substance' : '3000001547', 'route' : '4100000056'})

In [67]:
result

{'ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 80 MG AS TABLET IN ORAL': '1000009689',
 'AMLODIPINE (AMLODIPINE BESYLATE) 10 MG | ATORVASTATIN (ATORVASTATIN CALCIUM) 80 MG AS TABLET IN ORAL': '1000008169',
 'ATORVASTATIN (ATORVASTATIN CALCIUM) 10 MG AS TABLET IN ORAL': '1000019068',
 'AMLODIPINE (AMLODIPINE BESYLATE) 5 MG | ATORVASTATIN (ATORVASTATIN CALCIUM) 80 MG AS TABLET IN ORAL': '1000010934',
 'AMLODIPINE (AMLODIPINE BESYLATE) 10 MG | ATORVASTATIN (ATORVASTATIN CALCIUM) 10 MG AS TABLET IN ORAL': '1000007010',
 'AMLODIPINE (AMLODIPINE BESYLATE) 5 MG | ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 40 MG AS TABLET IN ORAL': '1000011683',
 'ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 40 MG AS TABLET IN ORAL': '1000023084',
 'AMLODIPINE (AMLODIPINE BESYLATE) 5 MG | ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 20 MG AS TABLET IN ORAL': '1000004813',
 'AMLODIPINE (AMLODIPINE BESYLATE) 10 MG | ATORVASTATIN (ATORVASTATIN CALC

In [37]:
[row['labels'] for row in result['CCD']]

[['ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 80 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 10 MG | ATORVASTATIN (ATORVASTATIN CALCIUM) 80 MG AS TABLET IN ORAL'],
 ['ATORVASTATIN (ATORVASTATIN CALCIUM) 10 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 5 MG | ATORVASTATIN (ATORVASTATIN CALCIUM) 80 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 10 MG | ATORVASTATIN (ATORVASTATIN CALCIUM) 10 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 5 MG | ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 40 MG AS TABLET IN ORAL'],
 ['ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 40 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 5 MG | ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 20 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 10 MG | ATORVASTATIN (ATORVASTATIN CALCIUM PROPYLENE GLYCOL SOLVATE) 40 MG AS TABLET IN ORAL'],
 ['AMLODIPINE (AMLODIPINE BESYLATE) 10

In [24]:
matches

{'drug': {'LUTETIUM OXODOTREOTIDE LU-177': '3000001032',
  'RAMUCIRUMAB': '3000003980',
  'THROMBIN ALFA': '3000002952',
  'AMLODIPINE': '3000002513',
  'NITROFURANTOIN': '3000000127',
  'ALFENTANIL': '3000000911',
  'ACACIA': '3000000820',
  'FLUTAMIDE': '3000000764',
  'ANTI-INHIBITOR COAGULANT COMPLEX': '3000004771',
  'TRIAZOLAM': '3000003468',
  'NILOTINIB': '3000001622',
  'FENTANYL CITRATE': '3000001599',
  'DAPTOMYCIN': '3000002748',
  'CINACALCET': '3000002939',
  'DISOPYRAMIDE PHOSPHATE': '3000002318',
  'DROSPIRENONE': '3000002109',
  'IRINOTECAN': '3000003522',
  'TOCOPHEROL': '3000002449',
  'FLUDROCORTISONE': '3100005314',
  'CEFOTETAN DISODIUM': '3000004281',
  'COPPER': '3000003164',
  'DIFLORASONE DIACETATE': '3000002382',
  'FEXOFENADINE': '3100005340',
  'METHENAMINE': '3000002610',
  'PEGASPARGASE': '3000005086',
  'FLIBANSERIN': '3000002603',
  'VALSARTAN': '3000003776',
  'NITISINONE': '3000000128',
  'FLUOROMETHOLONE': '3000000760',
  'NABUMETONE': '3000002617',
