**Question: Are there differences in environmental exposure among patients with early-onset vs late-onset Asthma?**
* Find patients diagnosed with Asthma
* Find age of diagnosis for Asthma patients
* Find location for Asthma patients
* Find exposures by location for Asthma patients
* Compare exposures of patients diagnosed with Asthma during childhood vs during adulthood

### Function and dataset definitions

In [1]:
## All the imports we need
import urllib2
from urllib2 import Request, urlopen, URLError
from urllib import quote_plus

import mysql.connector

import pprint, json, requests

from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date
from greentranslator.api import GreenTranslator
query = GreenTranslator ().get_query ()

import dateutil

import zipcode

In [2]:
#connect to UMLS
try:
    cnx = mysql.connector.connect(user='tadmin',
                                password='ncats_translator!',
                                database='umls',
                                host='translator.ceyknq0yekb3.us-east-1.rds.amazonaws.com')
except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print("Something is wrong with your user name or password")
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print("Database does not exist")
    else:
        print(err)
else:
    print
    #cnx.close()




#### UMLS and OHDSI functions

In [3]:
## Given disease/condition term, get back ICD codes from OHDSI
def findICD_ohdsi(txt, icd_version = 9):
    if icd_version == 9:
        icd_type = 'ICD9CM'
    elif icd_version == 10:
        icd_type = 'ICD10'
    else: raise Exception("Invalid ICD version specified")    
    url_con = "http://api.ohdsi.org/WebAPI/vocabulary/search"
    headers = {'content-type': 'application/json'}
    params = {"QUERY": txt,
              "VOCABULARY_ID": [icd_type]}
    response = requests.post(url_con, data=json.dumps(params), headers=headers)
    data= json.loads(response.text.decode('utf-8'))
    return [d["CONCEPT_CODE"] for d in data]
print findICD_ohdsi('asthma')

# Get ICD10/ICD9 code for a given string from UMLS. By default we get back ICD10.
def findICD_umls(name, icd_version = 10):
    if icd_version == 9:
        icd_type = 'ICD9CM'
    elif icd_version == 10:
        icd_type = 'ICD10'
    else: raise Exception("Invalid ICD version specified")

    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='"+icd_type+"' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd10 = "Undef"
        for code in cursor:
            icd10 = code
        return (icd10[0])
    return ("Undef")

[u'E945.7', u'493', u'493.9', u'493.90', u'493.92', u'493.91', u'493.2', u'493.20', u'493.22', u'493.21', u'493.82', u'493.0', u'493.00', u'493.02', u'493.01', u'V17.5', u'493.1', u'493.10', u'493.12', u'493.11', u'493.8', u'975.7']


#### Exposure data source founctions

In [4]:
#Function takes date of diagnosis, and location as input and returns degree of exposure 

def findExposureAtDateOfDx(dxdate,lon,lat):
    #dxnextdate = dateutil.parser.parse(dxdate) + timedelta(days=1)
    #dxnextdate = datetime.date(dxnextdate)
    pm25score = query.expo_get_scores (exposure_type = 'pm25',
                            start_date = dxdate,
                            end_date = dxdate,
                            exposure_point = ''+lon+','+lat+'')
    return(pm25score) 

#### HUSH+ and FHIR synthetic data source functions

In [5]:
## Functions to retreive patients from different sources - HUSH+ & FHIR synthetic
def findPatients_fhir(code):
    try:
        url ="http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+code+"&_count=%d"
        txt = urlopen(url).read()
    except Exception, e:
        raise Exception(e)
    stuff = json.loads(txt)
    #print stuff
    return(stuff)

def findPatients_unc(age='8', sex='male', race='white', location='OUTPATIENT'):
    query = GreenTranslator ().get_query()
    return query.clinical_get_patients(age, sex, race, location)

In [6]:
#Function to retrieve age at diagnosis & location for patients from FHIR synthetic
def findDxAgeLocation_fhir(stuff):
    res = []
    for p in stuff['entry']:
        d1 = p['assertedDate']
        id = p['subject']['reference'][8:]
        try:
            url = 'http://ictrweb.johnshopkins.edu/rest/synthetic/Patient/'+id
            txt = urlopen(url).read()
        except:
            print "ictr api doesn't work"
        st = json.loads(txt)
        #pprint.pprint(st)
        d2 = st['birthDate']
        z = st['address'][0]['postalCode']
        datedx = d2 #e.g. 1925-12-08 00:00:00
        d1 = dateutil.parser.parse(d1)
        d2 = dateutil.parser.parse(d2)
        #print d1
        #print d2
        diff = d1 - d2
        #print diff
             
        obj = zipcode.isequal(z.encode("ascii", "ignore"))
        if obj is not None:
            d = obj.to_dict()
            lat = d['lat']
            lon = d['lon']
        else:
            lat = None
            lon = None
        
        t = (id,diff.days,datedx,lat,lon)         
        res.append(t)
    return(res)
#Add function to retrieve age at diagnosis & location for patients from HUSH+

### "What is age of diagnosis and location of Asthma patients ?"

In [None]:
asthmaCodes = findICD_umls("asthma") # We go with ICD10 codes

## Get age of diagnosis & locations for FHIR synthetic patients.
for icd in asthmaCodes:
    try:
        p = findPatients_fhir(icd)
        # patients with icd exist
    except:
        print "no patients with icd"
        exit()
    p_ageloc = findDxAgeLocation_fhir(p)
    #pprint.pprint(p_ageloc)
    dxlist = ["id","ageOfDxInDays", "dateAtDx", "long","lat"]
    print dxlist
    for dx in p_ageloc:
        print dx
        
## Add code to get age of diagnosis & locations for HUSH+ patients.

### "What are exposures at date of diagnosis for each patient?"

In [None]:
findExposureAtDateOfDx('2010-01-07','35.9131996','-79.0558445')

#for p in p_ageloc:
    #findExposureAtDateOfDx(p[2],p[3],p[4])
