**Question: Can we identify novel off-label use for early-onset and late-onset Asthma respectively?**
* Find age of diagosis for Asthma patients
* Find medications (and RxNorm codes for meds) taken by Asthma patients
* Find all diagnoses of Asthma patients
* Classify medications taken by each patient as on- or off- label drug use
* Find medication classes for medications taken by Asthma patients
* Compare medication classes taken by patients diagosed with Asthma during childhood vs during adulthood

In [1]:
## All the imports we need
from urllib2 import Request, urlopen, URLError
from urllib import quote_plus

import mysql.connector

import pprint, json, requests

from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date
from greentranslator.api import GreenTranslator

import dateutil

**Step 1: Find patients diagnosed with Asthma as a child (Group 1)**

In [2]:
## Find HUSH+ patients matching a list of ICD codes as children

query = GreenTranslator ().get_query ()

HUSHplusChildren = query.clinical_get_patients (age='8', sex='male', 
                                        race='white', location='OUTPATIENT')
##pprint.pprint (HUSHpluspatients)

Initializing thread provenance @thread: MainThread


ValueError: No JSON object could be decoded

**Step 2: Find patients diagnosed with Asthma as an adult (Group 2)**

In [3]:
## Find HUSH+ patients matching a list of ICD codes as adults

query = GreenTranslator ().get_query ()

HUSHplusAdults = query.clinical_get_patients (age='8', sex='male', 
                                        race='white', location='OUTPATIENT')
##pprint.pprint (HUSHpluspatients)

ValueError: No JSON object could be decoded

** Step 3: Find medications for patients in Group 1 and Group 2 **

In [4]:
## Pull meds for HUSH+ patients
meds = {}
for x in HUSHplusChildren:
    medList = x['medList']
    # Collect the unique meds
    for m in medList.keys():
        found = False
        try:
            meds[medList[m]] = meds[medList[m]]+1
        except KeyError:
            meds[medList[m]] = 1
pprint.pprint(meds)

NameError: name 'HUSHplusChildren' is not defined

** Step 4: Find RxNorm codes for meds prescribed in patients from Group 1 and Group 2 **

In [5]:
## For a given medication string get NCBO annotations
## We let NCBO match any ontology since just using RxNORM doesn't
## always give us just the drug name (e.g., "CLINDAMYCIN 15 MG/ML ORAL SOLUTION" is
## a valid RxNORM term)
def med2rxnorm(txt):
    url = 'http://data.bioontology.org/annotator?text=%s&apikey=b792dd1b-cdc2-4cc8-aaf2-4fa4fbf47e4e'
    txt = urlopen(url % quote_plus(txt)).read()
    resp = json.loads(txt)
    if len(resp) == 0: return([])
    annos = []
    for aresp in resp:
        annos.extend([ x['text'] for x in aresp['annotations'] ])
    ##annos = filter(lambda x: not any(d in x for d in'0123456789'), annos)
    return(annos)
print med2rxnorm("CLINDAMYCIN 15 MG/ML ORAL SOLUTION")        

[u'CLINDAMYCIN 15 MG/ML ORAL SOLUTION', u'CLINDAMYCIN 15 MG/ML', u'CLINDAMYCIN', u'ORAL SOLUTION']


In [6]:
## Get RxNORM codes for medication strings
medrxnorm = {}
for med in meds.keys():
    if med is None: continue    
    annos = med2rxnorm(med)
    print 'Processing %s and found %d annotations' % (med, len(annos))
    medrxnorm[med] = {'count':meds[med], 'annos':annos}    

** Step 5: Find all indications (as ICD-10 codes) for any of the drugs prescribed in patients from Group 1 and Group 2 **

In [7]:
## Given a drug (identified by RXNORM), get conditions that the drug has a 
## status (approved, phase 3) for, via NDK API
def drug2conditions(drug):
    import urllib
    url = "https://tripod.nih.gov/ndk/treatment/%s/conditions" % (drug)
    page = urllib.urlopen(url).read().strip()
    if page == "":
        return None
    #print page
    try:
        resp = json.loads(page)
    except ValueError, HTTPError:
        return None
    conds = []
    for aresp in resp:
        condname = aresp['name']
        if 'ICD10' in aresp.keys():
            condicd10 = aresp['ICD10']
        else: condicd10 = []
        conds.append( (aresp['status'], condname, condicd10) )
    return(conds)

# Given a medication try all annotations to find conditions
def med2conditions(m):
    annos = medrxnorm[m]['annos']
    for a in annos:
        r = drug2conditions(a)
        if (r != None):
            return r
    return None

In [10]:
keys = list(medrxnorm.keys())
n = 10
annos = medrxnorm[keys[n]]['annos']
conds = []
for anno in annos:
    conds.extend(drug2conditions(anno))
print "##", keys[n], "##", conds

IndexError: list index out of range

In [8]:
## Get approval status for each medication in a list--Generate a dictionary that contains a dictionary for each
## medication with the approval status for each condition (by ICD10 code)
def approvalStatus(meds):
    approved = {}
    phase4 = {}
    phase3 = {}
    phase2 = {}
    phase1 = {}
    unknownMeds = []
    for m in meds:
        if not(m==None):
            x = med2conditions(m)
            #print "drug conditions"
            #print x
            if x==None:
                print "Unknown mediction conditions for "+m
                print medrxnorm[m]['annos']
                unknownMeds.append(m)
            else:
                for d in x:
                    #print "Med info"
                    #print d
                    if d[0]=="Approved":
                        for c in d[2]:
                            try:
                                approved[m].append(c)
                            except KeyError:
                                approved[m] = [c]
                    if d[0]=="Phase 4":
                        for c in d[2]:
                            try:
                                phase4[m].append(c)
                            except KeyError:
                                phase4[m] = [c]
                    if d[0]=="Phase 3":
                        for c in d[2]:
                            try:
                                phase3[m].append(c)
                            except KeyError:
                                phase3[m] = [c]
                    if d[0]=="Phase 2":
                        for c in d[2]:
                            try:
                                phase2[m].append(c)
                            except KeyError:
                                phase2[m] = [c]
                    if d[0]=="Phase 1":
                        for c in d[2]:
                            try:
                                phase1[m].append(c)
                            except KeyError:
                                phase1[m] = [c]

    return (approved,phase4,phase3,phase2,phase1,unknownMeds)


In [9]:
## For a given patient record (HUSH+ format), return the the approval status for each of the patient's medications
def patientMedicationStatus(p):
    diags = []
    #print "Diag codes"
    for d in p['diag']:
        if d[0:6]=="ICD10:":
            #print d
            diags.append(d[6:])
    med = []
    for m in p['medList'].keys():
        med.append(p['medList'][m])
    #print "Meds"
    #print med
    (approved,phase4,phase3,phase2,phase1,unknownMeds) = approvalStatus(med)

    drugStatus = {}
    for m in p['medList'].keys():
        if m in unknownMeds:
            status = "Unknown"
        else:
            x = p['medList'][m]
            #print x
            try:
                a = approved[x]
            except KeyError:
                a = []
            try:
                p4 = phase4[x]
            except KeyError:
                p4 = []
            try:
                p3 = phase3[x]
            except KeyError:
                p3 = []
            try:
                p2 = phase2[x]
            except KeyError:
                p2 = []
            try:
                p1 = phase1[x]
            except KeyError:
                p1 = []

            if len(annos)==0:
                status = "Unknown"
            else:
                status = "offLabel"
            for d in diags:
                if d in p1:
                    status = "Phase1"
                if d in p2:
                    status = "Phase2"
                if d in p3:
                    status = "Phase3"
                if d in p4:
                    status = "Phase4"
                if d in a:
                    status = "Approved"
        drugStatus[x] = status
    return drugStatus


In [10]:
#compute drug status info for all child patients
patientDrugInfo = {}
for p in HUSHplusChildren:
    print "Processing "+p['patient_id']
    patientDruginfo[p['patient_id']] = patientMedicationStatus(p)

pprint.pprint(patientDrugInfo)

NameError: name 'HUSHplusChildren' is not defined

** Step 6: Find all diagnoses of Asthma patients in Group 1 and Group 2**

In [None]:
## Find all diagnoses of HUSH+ patients in Group 1
## Find all diagnoses of HUSH+ patients in Group 2

**Step 7: For both groups, classify medications taken by each patient as on- or off- label drug use (patient by drug matrix)**

In [None]:
## Create patient by drug matrix

** Step 8: Create patient by drug class matrix **

In [None]:
## Create patient by drug class matrix

**Step 9: Assess off-label drug use in childhood-onset vs adult-onset Asthma patients **

In [8]:
## Compare patient x drug class for Group 1 vs Group 2