In [1]:
from os.path import expanduser
filepath = expanduser("~/Documents/RxNorm_full_01032022/rxnorm.db")

In [2]:
import sqlite3
con = sqlite3.connect(filepath)

In [3]:
rxconso_query = 'select * from RXNCONSO'
rxrel_query = 'select * from RXNREL'


In [4]:
import pandas as pd
rxcui_df = pd.read_sql_query(rxconso_query,con)
rxrel_df = pd.read_sql_query(rxrel_query,con)

In [5]:
rxcui_str_to_id = {row['STR']:row['RXCUI'] for _,row in rxcui_df.iterrows()}
rxcui_id_to_str = {row['RXCUI']:row['STR'] for _,row in rxcui_df.iterrows()}

In [6]:
rxrel_df['RELA'].unique()

array(['permuted_term_of', 'mapped_to', 'sort_version_of',
       'entry_version_of', 'has_permuted_term', 'has_ingredient', '',
       'has_basis_of_strength_substance', 'has_precise_active_ingredient',
       'has_active_ingredient', 'contains', 'has_modification',
       'is_modification_of', 'mapped_from', 'ingredient_of',
       'inverse_isa', 'print_name_of', 'isa', 'active_ingredient_of',
       'contained_in', 'included_in', 'product_monograph_title_of',
       'has_dose_form', 'has_active_moiety', 'has_inactive_ingredient',
       'active_moiety_of', 'inactive_ingredient_of', 'has_sort_version',
       'has_entry_version', 'includes', 'has_print_name', 'dose_form_of',
       'has_member', 'member_of', 'basis_of_strength_substance_of',
       'precise_active_ingredient_of', 'has_product_monograph_title',
       'has_tradename', 'tradename_of', 'has_part', 'form_of',
       'has_precise_ingredient', 'has_form', 'precise_ingredient_of',
       'reformulation_of', 'part_of', 'has_

In [7]:
resolving_terms = [
    'has_ingredient',
    'has_tradename'
]

In [8]:
resolve_mapping_df = rxrel_df[rxrel_df['RELA'].apply(lambda x : x in resolving_terms)]

In [9]:
def rxcui_values(df):
    return [(rxcui, rxcui_id_to_str[rxcui]) for rxcui in df['RXCUI2'] if rxcui in rxcui_id_to_str]
rxcui_deep_dict = {rxcui_1 : rxcui_values(df) for rxcui_1, df in resolve_mapping_df.groupby(by='RXCUI1')}

In [10]:
import pandas as pd
sider_df = pd.read_csv("data/meddra.tsv",sep='\t',header=None,names=['cui_id','tty','id','name'])

In [11]:
from Levenshtein import ratio

def search(term,targets=rxcui_df):
    best_match =  max([(target,ratio(term,target)) for target in targets],key=lambda x:x[1])[0]
    print(f"Best match is {best_match}")
    rxcui_id = rxcui_str_to_id[best_match]
    if rxcui_id in rxcui_deep_dict:
        possible_matches = rxcui_deep_dict[rxcui_id]
        print(possible_matches)
        return possible_matches
    return []
    

In [12]:
ratio("tylenol","tylonol k")

0.75

In [15]:
rxcuis = [el[0] for el in search("tylenol",rxcui_str_to_id.keys())]

Best match is Tylenol
[('161', 'Product containing paracetamol (medicinal product)'), ('209387', 'Acetaminophen 325 mg ORAL TABLET, FILM COATED [TYLENOL Regular Strength]'), ('209459', 'ACETAMINOPHEN 500 mg ORAL TABLET [Tylenol Extra Strength Caplet]'), ('364772', 'acetaminophen Oral Solution [Tylenol]'), ('369070', 'acetaminophen Oral Tablet [Tylenol]'), ('569998', 'acetaminophen 325 MG [Tylenol]'), ('570070', 'acetaminophen 500 MG [Tylenol]'), ('570122', 'acetaminophen 100 MG/ML [Tylenol]'), ('608663', 'acetaminophen 80 MG [Tylenol]'), ('608664', 'acetaminophen Disintegrating Oral Tablet [Tylenol]'), ('608680', 'acetaminophen 160 MG [Tylenol]'), ('608681', 'Tylenol 160 MG Disintegrating Oral Tablet'), ('646457', 'acetaminophen 650 MG [Tylenol]'), ('646458', 'acetaminophen Extended Release Oral Tablet [Tylenol]'), ('646459', 'Tylenol 650 MG Extended Release Oral Tablet'), ('692667', 'acetaminophen Chewable Tablet [Tylenol]'), ('692668', 'APAP 500 MG Chewable Tablet [Tylenol]'), ('7073

In [18]:
rxcui_df[rxcui_df['RXCUI'].isin(rxcuis)]

Unnamed: 0,RXCUI,LAT,TS,LUI,STT,SUI,ISPREF,RXAUI,SAUI,SCUI,SDUI,SAB,TTY,CODE,STR,SRL,SUPPRESS,CVF
472,161,ENG,,,,,,10280795,,,,USP,IN,m150,Acetaminophen,,N,
473,161,ENG,,,,,,10326508,,,,GS,IN,12,Acetaminophen,,N,
474,161,ENG,,,,,,10333986,,DB00316,,DRUGBANK,FSY,DB00316,Acetaminofén,,N,
475,161,ENG,,,,,,10795555,,90332006,,SNOMEDCT_US,PT,90332006,Acetaminophen-containing product,,N,
476,161,ENG,,,,,,10808671,,90332006,,SNOMEDCT_US,SY,90332006,Paracetamol-containing product,,N,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062761,2374361,ENG,,,,,,12406877,12406877,2374361,,RXNORM,SBD,2374361,acetaminophen 500 MG Oral Powder [Tylenol],,N,4096
1062762,2374361,ENG,,,,,,12406878,12406878,2374361,,RXNORM,PSN,2374361,Tylenol DISSOLVE PACKS 500 MG Oral Powder,,N,4096
1062763,2374361,ENG,,,,,,12406879,12406879,2374361,,RXNORM,SY,2374361,Tylenol 500 MG Oral Powder,,N,4096
1062764,2374361,ENG,,,,,,12407944,12407944,2374361,,RXNORM,SY,2374361,APAP 500 MG Oral Powder [Tylenol],,N,4096


In [19]:
rxcui_df['TTY'].value_counts()

DP             188573
BD             111580
SY              94617
CD              81271
IN              62094
PT              44233
FN              42127
SCD             37556
BN              34524
CDD             34327
CDC             34327
CDA             34327
PSN             34064
AB              28086
SCDC            27059
TMSY            22420
SU              22290
SBD             22026
SBDG            19185
SBDC            18215
SCDG            15856
SCDF            14557
SBDF            13759
MS              12497
CE               4807
ET               4672
FSY              4374
GN               3788
MIN              3767
NM               3293
PIN              3221
PM               2879
PEP              2674
N1               2339
MH               2253
MTH_RXN_BD       2161
MTH_RXN_CD       1819
MTH_RXN_DP       1807
PCE              1698
DF               1035
PTGB             1019
RXN_PT            927
BPCK              742
SYGB              725
GPCK              699
MTH_RXN_CD

In [None]:
# rxcui_df.set_index('RXCUI')

Unnamed: 0_level_0,LAT,TS,LUI,STT,SUI,ISPREF,RXAUI,SAUI,SCUI,SDUI,SAB,TTY,CODE,STR,SRL,SUPPRESS,CVF
RXCUI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3,ENG,,,,,,8717795,,58488005,,SNOMEDCT_US,PT,58488005,"1,4-alpha-Glucan branching enzyme",,N,
3,ENG,,,,,,8717796,,58488005,,SNOMEDCT_US,FN,58488005,"1,4-alpha-Glucan branching enzyme (substance)",,N,
3,ENG,,,,,,8717808,,58488005,,SNOMEDCT_US,SY,58488005,"Amylo-(1,4,6)-transglycosylase",,N,
3,ENG,,,,,,8718164,,58488005,,SNOMEDCT_US,SY,58488005,Branching enzyme,,N,
19,ENG,,,,,,10794494,,112116001,,SNOMEDCT_US,SY,112116001,17-hydrocorticosteroid,,N,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2588089,ENG,,,,,,12716544,,,,MTHSPL,DP,61133-6015,Flunixin Meglumine 50 mg in 1 mL INTRAMUSCULAR...,,N,
2588090,ENG,,,,,,10805247,,773250004,,SNOMEDCT_US,PT,773250004,Iobenguane (131-I)-containing product in paren...,,N,
2588090,ENG,,,,,,10811396,,773250004,,SNOMEDCT_US,FN,773250004,Product containing iobenguane (131-I) in paren...,,N,
2588091,ENG,,,,,,12679814,,,,GS,BD,128647,COMIRNATY COVID-19 Vaccine Suspension for Inje...,,N,


In [20]:
rxcui_df.loc[rxcuis]

KeyError: "None of [Index(['161', '209387', '209459', '364772', '369070', '569998', '570070',\n       '570122', '608663', '608664', '608680', '608681', '646457', '646458',\n       '646459', '692667', '692668', '707326', '707327', '731370', '828553',\n       '828554', '828555', '828556', '828557', '828558', '1146434', '1187310',\n       '1187311', '1187315', '1243440', '1296041', '1296526', '1738138',\n       '1738139', '2178756', '2178757', '2178758', '2374361'],\n      dtype='object')] are in the [index]"

In [None]:
rxcui_str_to_id['Tylenol']

'202433'

In [None]:
search("Acetaminophen 325 mg ORAL TABLET, FILM COATED [TYLENOL Regular Strength]",rxcui_str_to_id.keys())

Best match is Acetaminophen 325 mg ORAL TABLET, FILM COATED [TYLENOL Regular Strength]


In [25]:
all_side_effects_path = "./data/gzs/meddra_all_se.tsv"
colnames = ['drug_id','a','b','c','d','adv_str']
drug_adv_df = pd.read_csv(all_side_effects_path,sep='\t',header=None,names=colnames)[['drug_id','adv_str']]

In [26]:
drug_adv_df

Unnamed: 0,drug_id,adv_str
0,CID100000085,Abdominal cramps
1,CID100000085,Abdominal pain
2,CID100000085,Abdominal pain
3,CID100000085,Gastrointestinal pain
4,CID100000085,Abdominal pain
...,...,...
309844,CID171306834,Respiratory failure
309845,CID171306834,Abnormal vision
309846,CID171306834,Visual impairment
309847,CID171306834,Warts


In [30]:
drug_df = pd.read_csv("data/gzs/drug_names.tsv",header=None,names=["id","name"],sep='\t')
drug_df

Unnamed: 0,id,name
0,CID100000085,carnitine
1,CID100000119,gamma-aminobutyric
2,CID100000137,5-aminolevulinic
3,CID100000143,leucovorin
4,CID100000146,5-methyltetrahydrofolate
...,...,...
1425,CID156603655,pegaptanib
1426,CID156842239,n-3
1427,CID170683024,x
1428,CID170695640,colestyramine


In [31]:
sider_drugs_set = set(drug_df['name'].values)
sider_id_to_name = {row['id']:row['name'] for _, row in drug_df.iterrows()}

In [32]:
drug_adv_df['drug_name'] = [sider_id_to_name[sider_id] for sider_id in drug_adv_df['drug_id'].values]

In [33]:
drug_adv_df

Unnamed: 0,drug_id,adv_str,drug_name
0,CID100000085,Abdominal cramps,carnitine
1,CID100000085,Abdominal pain,carnitine
2,CID100000085,Abdominal pain,carnitine
3,CID100000085,Gastrointestinal pain,carnitine
4,CID100000085,Abdominal pain,carnitine
...,...,...,...
309844,CID171306834,Respiratory failure,K779
309845,CID171306834,Abnormal vision,K779
309846,CID171306834,Visual impairment,K779
309847,CID171306834,Warts,K779


In [34]:
drug_adv_df.to_csv("data/intermediate/adverse_drugs.csv")