In [6]:
import pandas as pd
import os
import off_label
import concept_mapping

# get your current working directory
os.getcwd()
# set your working directory to the directory in which you have all your mimic data files
os.chdir('/Users/yc3972/Desktop/DBMI/Courses/GD1_Fall/G4003 Symbolic Methods/Project/off_label')

In [2]:
# MIMIC stores drug data as NDC codes
drug_concept = pd.read_csv('./data/DRUG_VOCAB/CONCEPT.csv', sep = '\t', header = 0)
# What is the corresponding RxNorm standard code for that drug?
drug_mapping = pd.read_csv('./data/DRUG_VOCAB/CONCEPT_RELATIONSHIP.csv', sep = '\t', header = 0)
# ICD_SNOMED mapping
diag_concept = pd.read_csv('./data/SNOMED_ICD/CONCEPT.csv', sep = '\t', header = 0)
diag_mapping = pd.read_csv('./data/SNOMED_ICD/CONCEPT_RELATIONSHIP.csv', sep = '\t', header = 0)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
diagnoses, drugs = off_label.returning_lists('Gabapentin') 

Searching for Patient 17868682 who has been given Gabapentin , for visit number 25218370.


### MIMIC NDC codes to RxNorm Ingredient

In [15]:
# Finding Standard RxNorm codes for drugs list from OMOP vocab
# Drug dictionary that stores the standard RxNorm codes for the given drugs
# Used if we were to traverse across drugs (to find related other ndc codes)
# NDC codes --> RxNorm standard --> all other related ndc codes that map to that RxNorm code
std_drug_dict = {}
for i in range(drugs.shape[0]):
    drug = drugs.loc[i,'drug']
    ndc = drugs.loc[i, 'ndc']

    # ndc = 0 means there is no ndc code
    if ndc == '0':
        continue
    ndc_id = drug_concept.loc[drug_concept.concept_code == ndc, 'concept_id']
    
    if len(ndc_id) == 0: # there is no matching standard
        continue
    ndc_id = ndc_id.item()
    standard_id = concept_mapping.maps_to(drug_mapping, ndc_id)
    ingredient = concept_mapping.find_ingredient(drug)
    # if rxnorm ingredient cannot be found directly, manually find via relationships
    if len(ingredient) == 0 or len(ingredient) > 1: 
        ingredient = [concept_mapping.find_ingredient2(standard_id)]

    if drug not in std_drug_dict: 
        std_drug_dict[drug] = ingredient

In [16]:
std_drug_dict

{'Acetaminophen': [1125315],
 'Docusate Sodium': [19126912],
 'Senna': [938268],
 'OxyCODONE (Immediate Release)': [1124957],
 'Gabapentin': [797399],
 'Tamsulosin': [924566],
 'Heparin': [1367571],
 'Ondansetron': [1000560],
 'BuPROPion XL (Once Daily)': [40221871],
 'Warfarin': [1310149],
 'Ketorolac': [1136980]}

In [18]:
gaba_rxnorm = std_drug_dict['Gabapentin'][0]
gaba_ndfrt = concept_mapping.find_NDFRT(gaba_rxnorm)
gaba_ndfrt

[4272456, 4351183]

### Diagnosis ICD to OMOP codes

In [19]:
# Standard SNOMED mappings for all the ICD diagnoses given to this patient
# ICD codes --> OMOP stanadard codes --> SNOMED codes
std_diag_dict = {}

for i in range(diagnoses.shape[0]):
    version = diagnoses.loc[i, 'icd_version']
    version = 'ICD' + str(version)
    code_str = diagnoses.loc[i,'icd_code']
    code = '.'.join(code_str[i:i+3] for i in range(0, len(code_str), 3))
    
    icd_match = diag_concept.loc[(diag_concept.concept_code == code)]
    if icd_match.shape[0] == 0:
        print("No match of that diagnosis on OMOP")
        continue
    # if both ICD and ICD CM match, then look for exact vocabulary source
    if icd_match.shape[0] > 1: 
        icd_concept = icd_match.loc[icd_match.vocabulary_id == version, 'concept_id'].item()
    # ICD may not match, but ICD CM might match. Use whatever we can
    else: 
        icd_concept = icd_match['concept_id'].item()
    
    std_diag = diag_mapping.loc[(diag_mapping.concept_id_1 == icd_concept) & 
                     (diag_mapping.relationship_id == 'Maps to'), 'concept_id_2'].values.tolist()

    given_diag = str(version) + " " + str(code) 
    if given_diag not in std_diag_dict: 
        std_diag_dict[given_diag] = std_diag

In [20]:
std_diag_dict

{'ICD10 C49.5': [201231, 376647],
 'ICD10 G62.9': [4174262],
 'ICD10 I48.2': [4141360],
 'ICD10 I34.0': [443962],
 'ICD10 F32.9': [440383],
 'ICD10 E66.9': [433736],
 'ICD10 G47.33': [442588],
 'ICD10 K40.90': [4288544],
 'ICD10 N50.3': [4181782],
 'ICD10 R59.0': [4168700],
 'ICD10 Z79.01': [46273937],
 'ICD10 M48.00': [77079],
 'ICD10 Z68.33': [4060985],
 'ICD10 R91.1': [4142875],
 'ICD10 Z85.828': [4179242]}

In [52]:
snomed_diag = {}
snomed_list = []
for diag in std_diag_dict.keys():
    omop_codes = std_diag_dict[diag]
    for i in range(len(omop_codes)):
        snomed_code = diag_concept.loc[(diag_concept.concept_id == omop_codes[i]) & 
                                      (diag_concept.vocabulary_id == 'SNOMED'),'concept_code'].values.tolist()
        snomed_list.extend(snomed_code)
        if diag not in snomed_diag:
            snomed_diag[diag] = snomed_code
        else:
            snomed_diag[diag].extend(snomed_code)

In [53]:
snomed_list

[188019007,
 372010005,
 42345000,
 426749004,
 48724000,
 35489007,
 414916001,
 78275009,
 396232000,
 43077002,
 274744005,
 711150003,
 76107001,
 162864005,
 427359005,
 429050006]

### SNOMED codes to all other related codes (standard to related non-standard)

In [60]:
# All other related diagnosis OMOP concept codes
# This will look different with the entire OMOP set, as it will contain more non-standard codes
related_diag = {}
related_list = []
for diag in std_diag_dict.keys():
    snomed = std_diag_dict[diag]
    # There might be more than one standard for that ICD code
    for j in range(len(snomed)): 
        all_codes = concept_mapping.mapped_from(diag_mapping, snomed[j])
        related_list.extend(all_codes)
        if diag not in related_diag:
            related_diag[diag] = all_codes
        else:
            related_diag[diag].extend(all_codes)

In [61]:
related_list

[201231,
 35206233,
 45571515,
 376647,
 35206234,
 35206236,
 44830975,
 44830976,
 44836833,
 45537817,
 45542611,
 45552258,
 45552261,
 45556989,
 45571515,
 45576320,
 1568397,
 1568398,
 4144933,
 4174262,
 35207458,
 40391593,
 40392003,
 44794718,
 44799712,
 45547755,
 45552543,
 45566898,
 45581510,
 45595957,
 3519154,
 3547903,
 1553753,
 1569171,
 4141360,
 45591467,
 443962,
 40399349,
 45601031,
 3528972,
 440383,
 4103573,
 4141235,
 35207154,
 37200320,
 40322275,
 44782706,
 44794775,
 44796370,
 44796936,
 44797200,
 44797412,
 44798442,
 44800512,
 44833421,
 45571759,
 45571760,
 45595904,
 433736,
 1568022,
 35207023,
 35207024,
 40272782,
 40321247,
 40321249,
 40351293,
 44813376,
 44833387,
 45533036,
 45547648,
 45576460,
 45591051,
 442588,
 40396500,
 44825323,
 45552539,
 3538308,
 3538310,
 3538311,
 3538312,
 3538691,
 3538694,
 1569582,
 1569588,
 4025008,
 4025009,
 4025665,
 4057085,
 4057086,
 4057530,
 4057532,
 4222781,
 4288544,
 40346380,
 4043849

In [22]:
import imp
imp.reload(concept_mapping)

<module 'concept_mapping' from '/Users/yc3972/Desktop/DBMI/Courses/GD1_Fall/G4003 Symbolic Methods/Project/off_label/concept_mapping.py'>

In [33]:
### Patient's drug list --> Indication database --> matching diagnosis codes
gaba_indication = pd.read_csv("Gabapentin_indications_drugcentral.csv", sep = ',', header =0)
gaba_indication['snomed_conceptid'] = gaba_indication['snomed_conceptid'].fillna(0).astype('int64')

In [63]:
# All the snomed concepts from gabapentin indication conditions
test = gaba_indication['snomed_conceptid'].values.tolist()
test

[35489007,
 6471006,
 90708001,
 2177002,
 32914008,
 0,
 198436008,
 82423001,
 418363000,
 197480006,
 203082005,
 193462001,
 418363000,
 7200002,
 68154008,
 422587007,
 247398009,
 107401000119105,
 609558009,
 716771000,
 42984000]

In [62]:
# are any of the gabapentin indications in the related list?
for concept in test:
    print(concept in related_list)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [65]:
for concept in test:
    print(concept in snomed_list)

True
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
