**Required**
* Connect to FHIR Synthetic Patient Data
* Connect to OHDSI Synthetic Patient Data
* Connect to HUSH+ Synthetic Patient Data

**Question: What are potentially diagnostic cluster groups to inform drug selection?**
* Find patients diagnosed with Asthma
* Find symptoms for Asthma
* Find occurences of symptoms in Asthma patients
* Find symptom clusters among Asthma patients

**Question: Are there differences in exposures among patients diagnosed with Asthma during childhood vs during adulthood?**
* Find age of diagosis for Asthma patients
* Find zipcode for Asthma patients
* Find exposures by zipcode for Asthma patients
* Compare exposures of patients diagnosed with Asthma during childhood vs during adulthood

**Question: Are there differences in medications prescribed off-label by patients diagnosed with Asthma during childhood vs during adulthood?**
* Find age of diagosis for Asthma patients
* Find medications taken by Asthma patients
* Find medication classes for medications taken by Asthma patients
* Compare medication classes taken by patients diagosed with Asthma during childhood vs during adulthood

In [54]:
## All the imports we need
from urllib2 import Request, urlopen, URLError
from urllib import quote_plus

import mysql.connector

import pprint, json, requests

from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date
from greentranslator.api import GreenTranslator

import dateutil

DISEASE2SYMPTOMS = [x.split("\t") for x in urlopen("https://www.nature.com/article-assets/npg/ncomms/2014/140626/ncomms5212/extref/ncomms5212-s4.txt").read().split("\n")]
DISEASE2SYMPTOMS = filter(lambda x: len(x) == 4, DISEASE2SYMPTOMS)
#print DISEASE2SYMPTOMS

In [28]:
#connect to UMLS
try:
    cnx = mysql.connector.connect(user='tadmin',
                                password='ncats_translator!',
                                database='umls',
                                host='translator.ceyknq0yekb3.us-east-1.rds.amazonaws.com')
except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print("Something is wrong with your user name or password")
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print("Database does not exist")
    else:
        print(err)
else:
    print
    #cnx.close()




In [56]:
#Define functions
def findICD10(name):
    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='ICD10' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd10 = "Undef"
        for code in cursor:
            icd10 = code
        return (icd10[0])
    return ("Undef")

def findICD9(name):
    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='ICD9CM' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd9 = "Undef"
        for code in cursor:
            icd9 = code
        return (icd9[0])
    return ("Undef")

In [57]:
# Find Asthma ICD10 codes
asthmaCodes = findICD10('Asthma')
#findICD10('Asthma')
findICD9('Fever')

u'780.60'

In [39]:
## FHIR patient data
import requests
import urllib2
import json

#base_url = 'http://ictrweb.johnshopkins.edu/rest/synthetic'
#patients = base_url+"/Patient"+"?_count=50"

#req = requests.get(patients)

def findPatients(code):
    try:
        response = urllib2.urlopen("http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+code+"")
    except:
        print "I can't find patients"
        exit()
    html = response.read()
    return (html)

In [32]:
def findAgeZipcode(stuff):
    res = []
    for p in stuff['entry']:
        d1 = p['assertedDate']
        id = p['subject']['reference'][8:]
        url = 'http://ictrweb.johnshopkins.edu/rest/synthetic/Patient/'+id
        txt = urlopen(url).read()
        st = json.loads(txt)
        #pprint.pprint(st)
        d2 = st['birthDate']
        zip = st['address'][0]['postalCode']
        d1 = dateutil.parser.parse(d1)
        d2 = dateutil.parser.parse(d2)
        #print d1
        #print d2
        diff = d1 - d2
        #print diff
        url = 'https://www.zipcodeapi.com/rest/lPMf5jmnBdclCZWkQwlFPJO6HkolG4N1TzgZSDnuRAPtzLOqi957STdzeBVVFIWz/multi-info.json/'+zip+'/degrees'
        txt = urlopen(url).read()
        st = json.loads(txt)
        #pprint.pprint(st)
        lat = st[zip]['lat']
        lng = st[zip]['lng']
        #print lat
        #print lng
        #print diff.days
        t = (id,d2,d1,diff.days,zip,lat,lng)
        res.append(t)
    return(res)

In [37]:
## Find FHIR patients with asthma codes
for c in asthmaCodes:
    
    ## Get asthma patients
    url = "http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+c+"&_count=1000"
    txt = urlopen(url).read()
    stuff = json.loads(txt)
    pprint.pprint(findAgeZipcode(stuff)) 
    

[(u'649564183',
  datetime.datetime(1962, 8, 25, 0, 0),
  datetime.datetime(1987, 5, 8, 0, 0),
  9022,
  u'20011',
  58.4331,
  -24.310601),
 (u'654674639',
  datetime.datetime(1959, 11, 19, 0, 0),
  datetime.datetime(1993, 12, 12, 0, 0),
  12442,
  u'20002',
  44.634616,
  -54.06544),
 (u'769015582',
  datetime.datetime(1943, 3, 17, 0, 0),
  datetime.datetime(1949, 8, 26, 0, 0),
  2354,
  u'20002',
  49.791236,
  -34.584875),
 (u'734758032',
  datetime.datetime(1946, 8, 3, 0, 0),
  datetime.datetime(2006, 6, 23, 0, 0),
  21874,
  u'20017',
  76.180468,
  -37.458263),
 (u'586683517',
  datetime.datetime(1942, 9, 11, 0, 0),
  datetime.datetime(1951, 2, 6, 0, 0),
  3070,
  u'20020',
  46.881815,
  -40.30987),
 (u'363706743',
  datetime.datetime(2003, 1, 3, 0, 0),
  datetime.datetime(2006, 12, 9, 0, 0),
  1436,
  u'20016',
  53.260896,
  -43.858198),
 (u'820157583',
  datetime.datetime(1973, 11, 22, 0, 0),
  datetime.datetime(1991, 6, 20, 0, 0),
  6419,
  u'20020',
  77.248578,
  -74.6873

  -43.174659),
 (u'423997517',
  datetime.datetime(1953, 3, 26, 0, 0),
  datetime.datetime(1973, 7, 21, 0, 0),
  7422,
  u'20816',
  65.885963,
  -19.824337),
 (u'685075073',
  datetime.datetime(1964, 1, 12, 0, 0),
  datetime.datetime(1990, 3, 8, 0, 0),
  9552,
  u'20746',
  57.171047,
  -55.718996),
 (u'769881309',
  datetime.datetime(1971, 3, 15, 0, 0),
  datetime.datetime(1992, 5, 4, 0, 0),
  7721,
  u'20001',
  93.914313,
  -63.266574),
 (u'841173906',
  datetime.datetime(1973, 6, 17, 0, 0),
  datetime.datetime(1976, 1, 22, 0, 0),
  949,
  u'20005',
  71.563287,
  -50.675252),
 (u'725040308',
  datetime.datetime(1977, 11, 2, 0, 0),
  datetime.datetime(2016, 8, 21, 0, 0),
  14172,
  u'20910',
  77.391015,
  -48.961535),
 (u'425588910',
  datetime.datetime(1996, 6, 4, 0, 0),
  datetime.datetime(2015, 6, 19, 0, 0),
  6954,
  u'20006',
  85.88116,
  -24.329507),
 (u'710534916',
  datetime.datetime(1946, 3, 26, 0, 0),
  datetime.datetime(2011, 9, 22, 0, 0),
  23921,
  u'20010',
  93.364

  -30.574171),
 (u'725155754',
  datetime.datetime(1980, 11, 9, 0, 0),
  datetime.datetime(1999, 3, 13, 0, 0),
  6698,
  u'20910',
  89.423128,
  -38.648295),
 (u'125343213',
  datetime.datetime(1972, 2, 27, 0, 0),
  datetime.datetime(1989, 12, 1, 0, 0),
  6487,
  u'20910',
  94.006791,
  -40.940126),
 (u'193904770',
  datetime.datetime(1925, 3, 27, 0, 0),
  datetime.datetime(2005, 12, 31, 0, 0),
  29499,
  u'22101',
  43.523879,
  -34.192972),
 (u'481779456',
  datetime.datetime(1988, 1, 9, 0, 0),
  datetime.datetime(1991, 10, 7, 0, 0),
  1367,
  u'20743',
  70.396918,
  -66.007027),
 (u'894149507',
  datetime.datetime(1944, 4, 8, 0, 0),
  datetime.datetime(2000, 6, 2, 0, 0),
  20509,
  u'22202',
  53.180795,
  -31.787698),
 (u'334009312',
  datetime.datetime(1987, 3, 1, 0, 0),
  datetime.datetime(1997, 7, 21, 0, 0),
  3795,
  u'20011',
  53.27648,
  -45.510039),
 (u'739178532',
  datetime.datetime(1978, 8, 23, 0, 0),
  datetime.datetime(1997, 10, 1, 0, 0),
  6979,
  u'20010',
  84.19

TypeError: list indices must be integers, not str

In [10]:
## Given disease/condition term, get back ICD codes
def txt2icd(txt):
    url_con = "http://api.ohdsi.org/WebAPI/vocabulary/search"
    headers = {'content-type': 'application/json'}
    params = {"QUERY": "Asthma",
              "VOCABULARY_ID": ["ICD9CM"]
    }
    response = requests.post(url_con, data=json.dumps(params), headers=headers)
    data= json.loads(response.text.decode('utf-8'))
    icd9arr=[]
    for d in data:
        icd9arr.append(d["CONCEPT_CODE"])
    return icd9arr
print txt2icd('asthma')

[u'E945.7', u'493', u'493.9', u'493.90', u'493.92', u'493.91', u'493.2', u'493.20', u'493.22', u'493.21', u'493.82', u'493.0', u'493.00', u'493.02', u'493.01', u'V17.5', u'493.1', u'493.10', u'493.12', u'493.11', u'493.8', u'975.7']


In [59]:
## Given disease name, get back symptoms (defined using MeSH terms) along with TFIDF scores
## Taken from https://www.nature.com/articles/ncomms5212
def disease2symptom(txt):
    s = filter(lambda x: txt.lower() in x[1].lower(), DISEASE2SYMPTOMS)
    return([(x[0], x[3]) for x in s])
disease = "Asthma"
symps = disease2symptom(disease)
print 'Found %s symptom MeSH terms for %s' % (len(symps), disease)

for s in symps:
    findICD9(s)

Found 206 symptom MeSH terms for Asthma


TypeError: cannot concatenate 'str' and 'tuple' objects

In [48]:
## Pull patients matching a list of ICD codes
query = GreenTranslator ().get_query ()

patients = query.clinical_get_patients (age='8', sex='male', 
                                        race='white', location='OUTPATIENT')
##pprint.pprint (patients)

In [6]:
meds = {}
for x in patients:
    medList = x['medList']
    # Collect the unique meds
    for m in medList.keys():
        found = False
        try:
            meds[medList[m]] = meds[medList[m]]+1
        except KeyError:
            meds[medList[m]] = 1
pprint.pprint(meds)

{None: 9,
 u'0.3 ML Epinephrine 1 MG/ML Auto-Injector [Epipen]': 1,
 u'120 ACTUAT Fluticasone propionate 0.05 MG/ACTUAT Nasal Inhaler': 2,
 u'24 HR Methylphenidate Hydrochloride 10 MG Extended Release Oral Capsule [Ritalin]': 1,
 u'ACYCLOVIR 5 % TOPICAL OINTMENT': 1,
 u'ADVAIR HFA 230-21 MCG INHALER': 1,
 u'ALBUTEROL 90 MCG INHALER': 1,
 u'ALBUTEROL SULF HFA 90 MCG INH': 7,
 u'ALBUTEROL SULFATE 2.5 MG/3 ML (0.083 %) SOLUTION FOR NEBULIZATION': 9,
 u'ALBUTEROL SULFATE HFA 90 MCG/ACTUATION AEROSOL INHALER': 19,
 u'AMOXICILLIN 250 MG CHEWABLE TABLET': 1,
 u'AMOXICILLIN 875 MG TABLET': 1,
 u'AUGMENTIN 400-57 MG/5 ML': 1,
 u'AUGMENTIN ES-600 SUSPENSION': 3,
 u'AZITHROMYCIN 250 MG TABLET': 11,
 u'Acetaminophen 21.7 MG/ML / Hydrocodone Bitartrate 0.5 MG/ML Oral Solution': 1,
 u'Acetaminophen 32 MG/ML Oral Suspension': 1,
 u'Albuterol 0.83 MG/ML Inhalant Solution': 3,
 u'Amitriptyline Hydrochloride 10 MG Oral Tablet': 1,
 u'Amoxicillin 120 MG/ML / Clavulanate 8.58 MG/ML Oral Suspension': 4,
 u

In [7]:
## For a given medication string get NCBO annotations
## We let NCBO match any ontology since just using RxNORM doesn't
## always give us just the drug name (e.g., "CLINDAMYCIN 15 MG/ML ORAL SOLUTION" is
## a valid RxNORM term)
def med2rxnorm(txt):
    url = 'http://data.bioontology.org/annotator?text=%s&apikey=b792dd1b-cdc2-4cc8-aaf2-4fa4fbf47e4e'
    txt = urlopen(url % quote_plus(txt)).read()
    resp = json.loads(txt)
    if len(resp) == 0: return([])
    annos = []
    for aresp in resp:
        annos.extend([ x['text'] for x in aresp['annotations'] ])
    ##annos = filter(lambda x: not any(d in x for d in'0123456789'), annos)
    return(annos)
print med2rxnorm("CLINDAMYCIN 15 MG/ML ORAL SOLUTION")        


[u'CLINDAMYCIN 15 MG/ML ORAL SOLUTION', u'CLINDAMYCIN 15 MG/ML', u'CLINDAMYCIN', u'ORAL SOLUTION']


In [None]:
## Get RxNORM codes for medication strings
medrxnorm = {}
for med in meds.keys():
    if med is None: continue    
    annos = med2rxnorm(med)
    print 'Processing %s and found %d annotations' % (med, len(annos))
    medrxnorm[med] = {'count':meds[med], 'annos':annos}    