In [None]:
import sys
sys.path.append("./")
from utils import pkl_load, pkl_dump, text_load

In [None]:
import pandas as pd
from collections import defaultdict
from copy import deepcopy
from tqdm import tqdm

In [None]:
from concurrent.futures import ProcessPoolExecutor, as_completed

In [None]:
"""
Readme

1. we will create a mapping between ndc and rxcui
2. we will create a mapping between rxcui and its ingradient level rxcui
3. we will create a mapping between unused rxcui and its current version
"""

## map concepts to name

In [None]:
df2 = pd.read_csv("../raw_data/RxNorm/rrf/RXNCONSO.RRF", dtype=str, header=None, sep="|")
df2.head()

In [None]:
valid_copts = set(df2[0])

In [None]:
rxcui2name = defaultdict(set)

def f2(x):
    rxcui = x[0]
    name = x[14]
    rxcui2name[rxcui].add(name)

xx = df2.apply(f2, axis=1)

In [None]:
pkl_dump(rxcui2name, "../resources/rxcui2name.pkl")

## remap outdated concepts to current concepts

In [None]:
# outdated concepts
df1 = pd.read_csv("../raw_data/RxNorm/rrf/RXNCUI.RRF", dtype=str, header=None, sep="|")
df1.head()

In [None]:
temp = defaultdict(set)

def f1(x):
    old = x[0]
    cur = x[4]
    if old == cur and cur not in valid_copts:
        return
    temp[old].add(cur)

xx = df1.apply(f1, axis=1)

In [None]:
s = set(df1[0])
ntemp = deepcopy(temp)

flag = True
while flag:
    flag = False
    for k, v in temp.items():
        for each in v:
            if each in s:
                if each not in temp:
                    continue
                flag = True
                nv = temp[each]
                for e in nv:
                    ntemp[k].add(e)
                ntemp[k].remove(each)
    temp = deepcopy(ntemp)

In [None]:
old2cur = dict()

for k, v in ntemp.items():
    if len(v) > 0:
        old2cur[k] = v

pkl_dump(old2cur, "../resources/rxcui_remap.pkl")

## rxnorm to ingradients

In [None]:
df3 = pd.read_csv("../raw_data/RxNorm/rrf/RXNREL.RRF", dtype=str, header=None, sep="|")
df3.head()

In [3]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "../raw_data/RxNorm_Drug_Relationships.png")

In [None]:
ingredients = set(df2[(df2[12]=='IN') & (df2[11]=='RXNORM')][0])

In [None]:
ingrad_map = dict()

for ingrd in ingredients:
    ingrad_map[ingrd] = set()
    
    s = list(set(df3[df3[0]==ingrd][4]))
    exist = set(deepcopy(s))
    
    while len(s) > 0:
        sub_ingrad = s.pop()
        ingrad_map[ingrd].add(sub_ingrad)        
#         sub_df = df3[(df3[0]==sub_ingrad) & (df3[7]=='isa') & (df3[7]=='constitutes') & (df3[7]=='tradename_of')]
        sub_df = df3[(df3[0]==sub_ingrad) & ((df3[7]=='isa') | (df3[7]=='tradename_of') | (df3[7]=='constitutes') | (df3[7]=='has_ingredient'))]
        for e in set(sub_df[4]):
            if e not in ingredients and e not in exist:
                exist.add(e)
                s.append(e)

In [None]:
def helper(ingrd):
    ingrad_map = dict()
    ingrad_map[ingrd] = set()
    
    s = list(set(df3[df3[0]==ingrd][4]))
    exist = set(deepcopy(s))
    
    while len(s) > 0:
        sub_ingrad = s.pop()
        ingrad_map[ingrd].add(sub_ingrad)        
#         sub_df = df3[(df3[0]==sub_ingrad) & (df3[7]=='isa') & (df3[7]=='constitutes') & (df3[7]=='tradename_of')]
        sub_df = df3[(df3[0]==sub_ingrad) & ((df3[7]=='isa') | (df3[7]=='tradename_of') | (df3[7]=='constitutes') | (df3[7]=='has_ingredient'))]
        for e in set(sub_df[4]):
            if e not in ingredients and e not in exist:
                exist.add(e)
                s.append(e)
    return ingrad_map

In [None]:
# may need more than 24 hours to finish
final_ingra_map = dict()

with ProcessPoolExecutor(max_workers=4) as exe:
#     for each in tqdm(exe.map(helper, ingredients), total=len(ingredients)):
#         final_ingra_map.update(each)
    for each in as_completed({exe.submit(helper, ing): ing for ing in ingredients}):
        final_ingra_map.update(each.result())

In [None]:
len(final_ingra_map)

In [None]:
rxcui2ingre = defaultdict(set)
for k, v in final_ingra_map.items():
    for each in v:
        rxcui2ingre[each].add(k)
pkl_dump(rxcui2ingre, "../resources/rxcui2ingredient.pkl")

## ndc to rxcui

In [None]:
df4 = pd.read_csv("../raw_data/RxNorm/rrf/RXNSAT.RRF", dtype=str, header=None, sep="|")
df4 = df4[df4[8]=='NDC']
df4.head()

In [None]:
ndc2rxcui = dict()

def f4(x):
    ndc = x[10]
    rxcui = x[0]
    ndc2rxcui[ndc] = rxcui
    
xx = df4.apply(f4, axis=1)

In [None]:
len(ndc2rxcui)

In [None]:
pkl_dump(ndc2rxcui, "../resources/ndc2rxcui.pkl")