# Mapping between ICD-9 and Snomed Ontology

In [41]:
from typing import List, Any, Dict
import pandas as pd
from pymedtermino.snomedct import *
import pickle
sys.path.append('..')
from lib.icd_9_snomed_mapping import ICD9SnomedMapping

In [42]:
ICD9_snomed_11 = pd.read_csv('../data/icd9_mapping/ICD9CM_SNOMED_MAP_1TO1_202012.txt', sep='\t')
ICD9_snomed_1M = pd.read_csv('../data/icd9_mapping/ICD9CM_SNOMED_MAP_1TOM_202012.txt', sep='\t')
ICD9_snomed_procedures_11 = pd.read_csv('../data/icd9_mapping/ICD9CMV3_SNOMED_MAP_1T1_202012.txt', sep='\t')
ICD9_snomed_procedures_1M = pd.read_csv('../data/icd9_mapping/ICD9CMV3_SNOMED_MAP_1TM_202012.txt', sep='\t')

In [43]:
ICD_9_mapping = pd.concat([ICD9_snomed_11, ICD9_snomed_1M])

In [44]:
ICD_9_mapping['ICD_CODE'] = ICD_9_mapping['ICD_CODE'].apply(lambda x: x.replace(".", ""))
ICD_9_mapping.shape

(49217, 12)

In [45]:
ICD_9_mapping.head()

Unnamed: 0,ICD_CODE,ICD_NAME,IS_CURRENT_ICD,IP_USAGE,OP_USAGE,AVG_USAGE,IS_NEC,SNOMED_CID,SNOMED_FSN,IS_1-1MAP,CORE_USAGE,IN_CORE
0,42731,Atrial fibrillation,1,1.89778,3.20644,2.55211,0.0,49436004.0,Atrial fibrillation (disorder),1,0.499,1
1,5990,"Urinary tract infection, site not specified",1,2.13933,1.6533,1.896315,0.0,68566005.0,Urinary tract infectious disease (disorder),1,0.5362,1
2,486,"Pneumonia, organism unspecified",1,3.4176,0.30077,1.859185,0.0,233604007.0,Pneumonia (disorder),1,0.4722,1
3,5856,End stage renal disease,1,0.04875,3.23327,1.64101,0.0,46177005.0,End-stage renal disease (disorder),1,0.0057,1
4,41401,Coronary atherosclerosis of native coronary ar...,1,2.58624,0.68026,1.63325,0.0,53741008.0,Coronary arteriosclerosis (disorder),1,0.8621,1


In [46]:
dictionary = {}
for index, row in ICD_9_mapping.iterrows():
    dictionary[row.ICD_CODE] = row.ICD_NAME

In [47]:
pickle.dump(dictionary, open("../data/icd9_mapping/mapping_icd9_description.pkl", "wb"))

## Extract the mapping between ICD-9 codes and Snomed ID

In [48]:
ICD_9_list = ICD_9_mapping['ICD_CODE'].unique()
mapping: dict[Any, list[int]] = {}
for code in ICD_9_list:
    snomed_ids = []
    for x in ICD_9_mapping[ICD_9_mapping['ICD_CODE'] == code]['SNOMED_CID'].values:
        try:
            snomed_ids.append(int(x))
        except:
            pass
    if snomed_ids:
        mapping[code] = snomed_ids

In [49]:
pickle.dump(mapping, open("../data/icd9_mapping/mapping_icd9_snomed.pkl", "wb"))

In [50]:
mapping = pickle.load(open("../data/icd9_mapping/mapping_icd9_snomed.pkl", "rb"))

# Extract relations

In this section we extract all the relations associated with the SNOMED IDs and we explain how we create the dictionary of relations.

## Example of due_to dictionary

For each ICD-9 code we consider all the corresponding SNOMED codes and then all the corresponding relations.

Example:
- The ICD-9 code 250.80 corresponds to 72 Snomed IDs
- The due_to dictionary for the ICD-9 code 250.80 is the following one:

 {'Cystic fibrosis',
 'Diabetes mellitus',
 'Diabetes mellitus type 2',
 'Disease of endocrine pancreas',
 'Immune hypersensitivity reaction',
 'Insulin resistance - type B',
 'Ischemia',
 'Peripheral nerve disease',
 'Vascular disorder of lower extremity',
 'Venous stasis syndrome'}

- In this dictionary we merged the due_to relations corresponding to all the SNOMED IDs, for example:
    - Diabetes mellitus type 2 -> SNOMED ID 422014003
    - Diabetes mellitus -> SNOMED ID 371087003


### Extract finding sites

In [51]:
finding_site_dict = ICD9SnomedMapping().extract_relations(mapping, 'finding_site')

In [52]:
pickle.dump(finding_site_dict, open("../data/mapping_relations/finding_site.pkl", "wb"))

### Extract causative agents

In [53]:
causative_agent_dict = ICD9SnomedMapping().extract_relations(mapping, 'causative_agent')

In [54]:
pickle.dump(causative_agent_dict, open("../data/mapping_relations/causative_agent.pkl", "wb"))

### Extract associated morphology

In [55]:
associated_morphology_dict = ICD9SnomedMapping().extract_relations(mapping, 'associated_morphology')

In [56]:
pickle.dump(associated_morphology_dict, open("../data/mapping_relations/associated_morphology.pkl", "wb"))

### Extract due to

In [57]:
due_to_dict = ICD9SnomedMapping().extract_relations(mapping, 'due_to')

In [58]:
pickle.dump(due_to_dict, open("../data/mapping_relations/due_to.pkl", "wb"))

## Extract Description

In [59]:
description_dict = ICD9SnomedMapping().extract_relations(mapping, 'description')

In [60]:
pickle.dump(description_dict, open("../data/mapping_relations/description.pkl", "wb"))