#### ■ Sample Code for 'umls_converter.py'

##### data handler for Unified Medical Language System (UMLS)
[UMLS](https://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources)


--- record example ---
- C0001041|ENG|P|L0001041|PF|S0414245|N|A31756755|12252053|194||RXNORM|IN|194|acetylcholine|0|N|4352|

***
#### ① MeSH --> UMLS CUI

In [1]:
import sys

BASE_DIR = '/workspace/home/azuma/DDI/github/UMLS_Handler'

sys.path.append(BASE_DIR)
import umls_converter as uc

In [2]:
dat = uc.Target2CUI(mrconso_path='/workspace/mnt/data1/Azuma/DDI_HDD1/UMLS/MRCONSO.RRF')
dat.set_target(name="MeSH",candi_path=BASE_DIR+'/data/target_candi.pkl')

['AIR', 'ALT', 'AOD', 'AOT', 'ATC', 'BI', 'CCC', 'CCPSS', 'CCS', 'CCSR_ICD10CM', 'CCSR_ICD10PCS', 'CDCREC', 'CDT', 'CHV', 'COSTAR', 'CPM', 'CPT', 'CPTSP', 'CSP', 'CST', 'CVX', 'DDB', 'DMDICD10', 'DMDUMD', 'DRUGBANK', 'DSM-5', 'DXP', 'FMA', 'GO', 'GS', 'HCDT', 'HCPCS', 'HCPT', 'HGNC', 'HL7V2.5', 'HL7V3.0', 'HPO', 'ICD10', 'ICD10AE', 'ICD10AM', 'ICD10AMAE', 'ICD10CM', 'ICD10DUT', 'ICD10PCS', 'ICD9CM', 'ICF', 'ICF-CY', 'ICNP', 'ICPC', 'ICPC2EDUT', 'ICPC2EENG', 'ICPC2ICD10DUT', 'ICPC2ICD10ENG', 'ICPC2P', 'ICPCBAQ', 'ICPCDAN', 'ICPCDUT', 'ICPCFIN', 'ICPCFRE', 'ICPCGER', 'ICPCHEB', 'ICPCHUN', 'ICPCITA', 'ICPCNOR', 'ICPCPOR', 'ICPCSPA', 'ICPCSWE', 'JABL', 'KCD5', 'LCH', 'LCH_NW', 'LNC', 'LNC-DE-AT', 'LNC-DE-DE', 'LNC-EL-GR', 'LNC-ES-AR', 'LNC-ES-ES', 'LNC-ET-EE', 'LNC-FR-BE', 'LNC-FR-CA', 'LNC-FR-FR', 'LNC-IT-IT', 'LNC-KO-KR', 'LNC-NL-NL', 'LNC-PT-BR', 'LNC-RU-RU', 'LNC-TR-TR', 'LNC-ZH-CN', 'MCM', 'MDR', 'MDRBPO', 'MDRCZE', 'MDRDUT', 'MDRFRE', 'MDRGER', 'MDRHUN', 'MDRITA', 'MDRJPN', 'MDRKOR',

ValueError: !! Inappropriate target SAB ; Choose from above list !!

#### ✖ MeSH --> 〇 MSH

In [3]:
dat = uc.Target2CUI(mrconso_path='/workspace/mnt/data1/Azuma/DDI_HDD1/UMLS/MRCONSO.RRF')
dat.set_target(name="MSH",candi_path=BASE_DIR+'/data/target_candi.pkl')
dat.narrow_lines()
dat.create_dic()

Nice ! ^^
Target name: MSH


16543671it [02:11, 125949.91it/s]


--- extract target records ---
1004434 / 16543671 records were extracted


100%|██████████| 1004434/1004434 [00:04<00:00, 204068.59it/s]


#### You can obtain dict as follows

In [4]:
name2cui = dat.name2cui
code2cui = dat.code2cui
# Show the normalized information about the target code of your interest
dat.get_codeinfo(codeID='D008103')

('C0023890', {'D008103'})
('C0239946', {'D008103'})
--- Result ---
query = D008103
UMLS CUI : ['C0023890', 'C0239946']
name : [{'Liver Cirrhosis', 'Cirrhosis, Liver', 'Cirrhosis, Hepatic', 'Hepatic Cirrhosis'}, {'Liver Fibrosis', 'Fibrosis, Liver'}]


***
#### ② DrugBankID --> UMLS CUI

In [8]:
dat = uc.Target2CUI(mrconso_path='/workspace/mnt/data1/Azuma/DDI_HDD1/UMLS/MRCONSO.RRF')
dat.set_target(name="DRUGBANK",candi_path=BASE_DIR+'/data/target_candi.pkl')
dat.narrow_lines()
dat.create_dic()
name2cui = dat.name2cui
code2cui = dat.code2cui

# Show the normalzed information about the target code of your interest
dat.get_codeinfo(codeID='DB00316')

Nice ! ^^
Target name: DRUGBANK


16543671it [02:14, 123225.20it/s]


--- extract target records ---
28211 / 16543671 records were extracted


100%|██████████| 28211/28211 [00:00<00:00, 55186.10it/s]

('C0000970', {'DB00316'})
('C1258986', {'DB00316'})
('C1950939', {'DB00316'})
--- Result ---
query = DB00316
UMLS CUI : ['C0000970', 'C1258986', 'C1950939']
name : [{'Paracetamol', 'Paracetamolum', 'p-acetaminophenol', '4-acetamidophenol', 'p-acetamidophenol', 'p-Acetylaminophenol', "4'-hydroxyacetanilide", 'N-acetyl-p-aminophenol', 'APAP', 'Acetaminophen', '4-(Acetylamino)phenol', 'Acenol', 'p-hydroxy-acetanilid', 'p-hydroxyacetanilide', 'Paracétamol', 'Acetaminofén'}, {'p-hydroxyphenolacetamide'}, {'Acétaminophène'}]



