**Question: Are there differences in exposures among patients diagnosed with Asthma during childhood vs during adulthood?**
* Find age of diagosis for Asthma patients
* Find location for Asthma patients
* Find exposures by location for Asthma patients
* Compare exposures of patients diagnosed with Asthma during childhood vs during adulthood

In [1]:
## All the imports we need
from urllib2 import Request, urlopen, URLError
from urllib import quote_plus

import mysql.connector

import pprint, json, requests

from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date
from greentranslator.api import GreenTranslator

import dateutil

In [2]:
## Given disease/condition term, get back ICD codes
def txt2icd(txt):
    url_con = "http://api.ohdsi.org/WebAPI/vocabulary/search"
    headers = {'content-type': 'application/json'}
    params = {"QUERY": "Asthma",
              "VOCABULARY_ID": ["ICD9CM"]
    }
    response = requests.post(url_con, data=json.dumps(params), headers=headers)
    data= json.loads(response.text.decode('utf-8'))
    icd9arr=[]
    for d in data:
        icd9arr.append(d["CONCEPT_CODE"])
    return icd9arr
print txt2icd('asthma')

[u'E945.7', u'493', u'493.9', u'493.90', u'493.92', u'493.91', u'493.2', u'493.20', u'493.22', u'493.21', u'493.82', u'493.0', u'493.00', u'493.02', u'493.01', u'V17.5', u'493.1', u'493.10', u'493.12', u'493.11', u'493.8', u'975.7']


In [3]:
#Find symptoms for Asthma
DISEASE2SYMPTOMS = [x.split("\t") for x in urlopen("https://www.nature.com/article-assets/npg/ncomms/2014/140626/ncomms5212/extref/ncomms5212-s4.txt").read().split("\n")]
DISEASE2SYMPTOMS = filter(lambda x: len(x) == 4, DISEASE2SYMPTOMS)
#print DISEASE2SYMPTOMS

In [4]:
## Given disease name, get back symptoms (defined using MeSH terms) along with TFIDF scores
## Taken from https://www.nature.com/articles/ncomms5212
def disease2symptom(txt):
    s = filter(lambda x: txt.lower() in x[1].lower(), DISEASE2SYMPTOMS)
    return([(x[0], x[3]) for x in s])
disease = "Asthma"
symps = disease2symptom(disease)
print 'Found %s symptom MeSH terms for %s' % (len(symps), disease)

#for s in symps:
 #   findICD9(s)

Found 206 symptom MeSH terms for Asthma


In [5]:
## Pull JHU FHIR patients matching an ICD code
import requests
import urllib2
import json

#base_url = 'http://ictrweb.johnshopkins.edu/rest/synthetic'
#patients = base_url+"/Patient"+"?_count=50"

#req = requests.get(patients)

def findPatients(code):
    try:
        response = urllib2.urlopen("http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+code+"")
    except:
        print "I can't find patients"
        exit()
    html = response.read()
    return (html)

In [6]:
#Define functions needed to connect patients with exposures
def findAgeZipcode(stuff):
    res = []
    for p in stuff['entry']:
        d1 = p['assertedDate']
        id = p['subject']['reference'][8:]
        url = 'http://ictrweb.johnshopkins.edu/rest/synthetic/Patient/'+id
        txt = urlopen(url).read()
        st = json.loads(txt)
        #pprint.pprint(st)
        d2 = st['birthDate']
        zip = st['address'][0]['postalCode']
        d1 = dateutil.parser.parse(d1)
        d2 = dateutil.parser.parse(d2)
        #print d1
        #print d2
        diff = d1 - d2
        #print diff
        url = 'https://www.zipcodeapi.com/rest/lPMf5jmnBdclCZWkQwlFPJO6HkolG4N1TzgZSDnuRAPtzLOqi957STdzeBVVFIWz/multi-info.json/'+zip+'/degrees'
        txt = urlopen(url).read()
        st = json.loads(txt)
        #pprint.pprint(st)
        lat = st[zip]['lat']
        lng = st[zip]['lng']
        #print lat
        #print lng
        #print diff.days
        t = (id,d2,d1,diff.days,zip,lat,lng)
        res.append(t)
    return(res)

In [7]:
#connect to UMLS
try:
    cnx = mysql.connector.connect(user='tadmin',
                                password='ncats_translator!',
                                database='umls',
                                host='translator.ceyknq0yekb3.us-east-1.rds.amazonaws.com')
except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print("Something is wrong with your user name or password")
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print("Database does not exist")
    else:
        print(err)
else:
    print
    #cnx.close()




In [8]:
#Define functions to use UMLS
def findICD10(name):
    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='ICD10' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd10 = "Undef"
        for code in cursor:
            icd10 = code
        return (icd10[0])
    return ("Undef")

def findICD9(name):
    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='ICD9CM' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd9 = "Undef"
        for code in cursor:
            icd9 = code
        return (icd9[0])
    return ("Undef")

In [9]:
# Find Asthma ICD10 codes
asthmaCodes = findICD10('Asthma')

In [10]:
## Find age and zipcode for FHIR patients with asthma diagnoses
for c in asthmaCodes:
    
    ## Get asthma patients
    url = "http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+c+"&_count=1000"
    txt = urlopen(url).read()
    stuff = json.loads(txt)
    pprint.pprint(findAgeZipcode(stuff)) 

HTTPError: HTTP Error 401: Unauthorized