In [1]:
import csv
import gzip
import tqdm
import json
import heapq
import random
import urllib2

import requests
from json import JSONEncoder

# Get data from the OHDSI WEB API 

## Get the list of existing cohorts

In [42]:
def get_cohorts():
    url = 'http://api.ohdsi.org/WebAPI/cohortdefinition'
    #print url
    
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    items  = json.loads(r.text)
    
    return items

def get_cohorts_id():
    # Returns the list of all existing cohorts ids
    data = get_cohorts()
    c_id = list()
    
    for cohort in data:
        c_id.append(cohort['id'])
    
    return c_id

def get_cohorts_name():
    data = get_cohorts()
    cohorts_name = list()
    
    for cohort in data:
        cohorts_name.append(cohort['name'])
        
    return cohorts_name

In [36]:
def mapping():
    # Update the mapping dicionaries
    data = get_cohorts()
    map_id_name = dict()
    map_name_id = dict()
    
    for cohort in data:
        map_id_name[cohort['id']] =  cohort['name']
        map_name_id[cohort['name']] = cohort['id']
    
    return map_id_name, map_name_id

map_id_name, map_name_id = mapping()

http://api.ohdsi.org/WebAPI/cohortdefinition


In [43]:
# Generate a cohort
def generate_cohort(id):
    '''
    Generates a cohort knowing its id
    id: string, id of the cohort
    '''
    uri = 'http://api.ohdsi.org/WebAPI/cohortdefinition/'    
    sourcekey = '1PCT'
    url = uri + '%s' %id + '/generate/' + sourcekey
    #print url
    
    r = requests.get(url, '')
    
    #get info about the progression of the generation process
    url = 'http://api.ohdsi.org/WebAPI/cohortdefinition/' + "%s"%id + '/info'
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    items  = json.loads(r.text)  
    
    return items

## Get existing concepts 

In [44]:
def get_concept_info(id):
    """
    Get info about a specific concept
    id: integer, concept id
    """
    
    url = "http://api.ohdsi.org/WebAPI/vocabulary/concept/" + "%s"%id
    #print url
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    data  = json.loads(r.text)  
    
    return data

In [6]:
data = get_concept_info(3037744)
print data

{u'VOCABULARY_ID': u'LOINC', u'CONCEPT_CLASS_ID': u'Clinical Observation', u'STANDARD_CONCEPT_CAPTION': u'Standard', u'CONCEPT_NAME': u'Descending thoracic aorta Diastolic blood pressure', u'INVALID_REASON': u'V', u'STANDARD_CONCEPT': u'S', u'INVALID_REASON_CAPTION': u'Valid', u'CONCEPT_CODE': u'8369-1', u'DOMAIN_ID': u'Measurement', u'CONCEPT_ID': 3037744}


## Concept set info

In [45]:
def get_conceptsets():
    '''
    Returns the list of all existing conceptsets ids and names
    '''
    conceptsets = list()
    url = 'http://api.ohdsi.org/WebAPI/conceptset/'
    #print url
    
    response = urllib2.urlopen(url).read()
    data = json.loads(response)
    
    return data

def get_conceptset_id(name):
    '''
    Returns the id of the concept set named 'name'
    If it doesn't exist, returns 0
    name: string
    '''
    conceptsets = get_conceptsets()
    for pair in conceptsets:
        if pair['name'] == name:
            return pair['id']
    return 0

def get_conceptset_detail(id):
    
    url = 'http://api.ohdsi.org/WebAPI/conceptset/' + '%s' %id + '/items'
    #print url
    
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    data = json.loads(r.text)
    return data

In [18]:
data = get_conceptset_detail(158060)
print data

http://api.ohdsi.org/WebAPI/conceptset/158060/items
[{u'conceptId': 1177480, u'includeMapped': 0, u'isExcluded': 0, u'includeDescendants': 0, u'id': 158061, u'conceptSetId': 158060}]


## Cohort creation

In [46]:
# Build the cohort for exposure to one drug alone
def cohort_definition_1D(concept):
    """
    Build the definition data for the creation of a new cohort (for exposure to a single drug)
    concept: json, typically obtained with the 'get_concept_info' function
    return: data, sringified json expression of the cohort definition
    """
    
    expression = {
      "ConceptSets": [{
          "id": 0,
          "name": "%s"%concept["CONCEPT_NAME"],
          "expression": {
            "items": [{
                "concept": {
                  "CONCEPT_CLASS_ID": "Ingredient",
                  "CONCEPT_CODE": "%s"%concept["CONCEPT_CODE"],
                  "CONCEPT_ID": "%d"%concept["CONCEPT_ID"],
                  "CONCEPT_NAME": "%s"%concept["CONCEPT_NAME"],
                  "DOMAIN_ID": "Drug",
                  "INVALID_REASON": "V",
                  "INVALID_REASON_CAPTION": "Valid",
                  "STANDARD_CONCEPT": "S",
                  "STANDARD_CONCEPT_CAPTION": "Standard",
                  "VOCABULARY_ID": "RxNorm"
                },
                "includeDescendants": True,
                "includeMapped": True
              }]
          }
        }
      ],
      "PrimaryCriteria": {
        "CriteriaList": [{
            "DrugExposure": {"CodesetId": 0}}],
        "ObservationWindow": {
          "PriorDays": "30",
          "PostDays": "30"},
        "PrimaryCriteriaLimit": {"Type": "First"}},
      "QualifiedLimit": {"Type": "All"},
      "ExpressionLimit": {"Type": "First"},
      "InclusionRules": [],
      "EndStrategy": {
        "DateOffset": {
          "DateField": "StartDate",
          "Offset": "30"
        }
      },
      "CensoringCriteria": []
    }

    string_expression = JSONEncoder().encode(expression)
    
    data = {
      "name":"%s"%concept['CONCEPT_NAME'],
      "description":"null",
      "expressionType":"SIMPLE_EXPRESSION",
      "createdBy":"anonymous",
      "createdDate":"2017-02-14, 22:00",
      "modifiedBy":"anonymous",
      "modifiedDate":"2017-03-09, 19:55",
      "expression":string_expression
    }

    return data

In [47]:
# Build the cohort for exposure to 2 different drugs within 7 days
def cohort_definition_2D(concept1, concept2):
    """
    Build the definition data for the creation of a new cohort (for exposure to 2 different drugs within 7 days)
    concept1, concept2: json, typically obtained with the 'get_concept_info' function
    return: data, sringified json expression of the cohort definition
    """
    
    expression = {
      "ConceptSets": [{
          "id": 0,
          "name": "%s"%concept1["CONCEPT_NAME"],
          "expression": {
            "items": [{
                "concept": {
                  "CONCEPT_CLASS_ID": "Ingredient",
                  "CONCEPT_CODE": "%s"%concept1["CONCEPT_CODE"],
                  "CONCEPT_ID": "%d"%concept1["CONCEPT_ID"],
                  "CONCEPT_NAME": "%s"%concept1["CONCEPT_NAME"],
                  "DOMAIN_ID": "Drug",
                  "INVALID_REASON": "V",
                  "INVALID_REASON_CAPTION": "Valid",
                  "STANDARD_CONCEPT": "S",
                  "STANDARD_CONCEPT_CAPTION": "Standard",
                  "VOCABULARY_ID": "RxNorm"},
                "includeDescendants": True,
                "includeMapped": True}]}},
          {
            "id":1,
            "name":"%s"%concept2["CONCEPT_NAME"],
            "expression":{
             "items":[{
                "concept":{
                    "CONCEPT_ID":"%d"%concept2["CONCEPT_ID"],
                    "CONCEPT_NAME":"%s"%concept2["CONCEPT_NAME"],
                    "STANDARD_CONCEPT":"S",
                    "INVALID_REASON":"V",
                    "CONCEPT_CODE":"%s"%concept2["CONCEPT_CODE"],
                    "DOMAIN_ID":"Drug",
                    "VOCABULARY_ID":"RxNorm",
                    "CONCEPT_CLASS_ID":"Ingredient",
                    "INVALID_REASON_CAPTION":"Valid",
                    "STANDARD_CONCEPT_CAPTION":"Standard"},
                 "includeDescendants":True,
                 "includeMapped":True}]}}
      ],
      "PrimaryCriteria": {
        "CriteriaList": [{
            "DrugExposure": {"CodesetId": 0}}],
        "ObservationWindow": {
          "PriorDays": "30",
          "PostDays": "30"},
        "PrimaryCriteriaLimit": {"Type": "First"}},
      "AdditionalCriteria":{
        "Type":"ALL",
        "CriteriaList":[{
            "Criteria":{"DrugExposure":{"CodesetId":1}},
            "StartWindow":{"Start":{"Days":"7","Coeff":-1}, "End":{"Days":"7","Coeff":1}},
            "Occurrence":{"Type":2,"Count":1}}],
        "DemographicCriteriaList":[],"Groups":[]},
      "QualifiedLimit": {"Type": "First"},
      "ExpressionLimit": {"Type": "First"},
      "InclusionRules": [],
      "EndStrategy": {
        "CustomEra":{"DrugCodesetId":0,"GapDays":37,"Offset":0}},
      "CensoringCriteria": []
    }

    string_expression = JSONEncoder().encode(expression)
    
    data = {
      "name":"%s_%s"%(concept1["CONCEPT_NAME"],concept2["CONCEPT_NAME"]),
      "description":"null",
      "expressionType":"SIMPLE_EXPRESSION",
      "createdBy":"anonymous",
      "createdDate":"2017-02-14, 22:00",
      "modifiedBy":"anonymous",
      "modifiedDate":"2017-03-09, 19:55",
      "expression":string_expression
    }

    return data

In [48]:
# Creation of a new cohort using the OHDSI WebAPI
def create_cohort(data):
    '''
    Creation of a new cohort
    data: json, with the parameters of the new cohort
    '''
    
    # We check if a cohort with the same name doesn't already exist
    cohorts_name = get_cohorts_name()
    if data["name"] in cohorts_name:
        print "A cohort with the same name already exists, please choose another name"
        return 0
    
    url = 'http://api.ohdsi.org/WebAPI/cohortdefinition/'

    r = requests.post(url, json=data)

    #print r.url
    #print r.text
    
    return r.status_code

def delete_cohort(cohort_id):
    """
    Deletes the cohort
    id: integer, the cohort's id
    """

    url = 'http://api.ohdsi.org/WebAPI/cohortdefinition/' + '%s' %cohort_id
    r = requests.delete(url)
    #print r.url
    #print r.text
    
    print "Cohort was deleted with exit status %i " %r.status_code
    return r.status_code

## Retrieve data from a cohort

In [49]:
def getAlldistinctPatients(cohort_id):
    url = 'http://api.ohdsi.org/WebAPI/cohortresults/1PCT/'+'%s'%cohort_id+'/members/1-30000'
    #print url
    
    response = urllib2.urlopen(url).read()
    data = json.loads(response)    
    
    patientset = list()

    for patient in data:
        patientset.append(patient['personId'])
    return patientset          

In [None]:
#demo_cohort_id = '6839'
#patientset = getAlldistinctPatients(demo_cohort_id)
#print len(patientset)
#print patientset[0]

In [50]:
def getPatientRecord(patient_id):
    '''
    Get all records for a specific patient
    Returns a list of all those observations
    '''
    
    url = 'http://api.ohdsi.org/WebAPI/1PCT/person/' + '%s' %patient_id
    #print url
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    data = json.loads(r.text)
    
    records  = list()
    
    for event in data['records']:
           records.append((event['domain'], event['conceptId'], event['conceptName'], event['startDate'], event['endDate']))
    
    return records

def getPatientDetail(patient_id):
    '''
    Get information about a specific patient (gender, year of birth, number of records)
    '''
    url = 'http://api.ohdsi.org/WebAPI/1PCT/person/' + '%s' %patient_id
    #print url
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    data = json.loads(r.text)
    
    return data['gender'], data['yearOfBirth'], data['recordCount']

def getPatientObservation(patient_id):
    '''
    Get all the observation periods of a specific patient
    Returns the list of all observation periods
    '''
    
    url = 'http://api.ohdsi.org/WebAPI/1PCT/person/' + '%s' %patient_id
    r = requests.get(url, '')
    r.encoding = 'utf-8'
    data = json.loads(r.text)
    
    periods = list()
    
    for obs in data['observationPeriods']:
        periods.append((obs['startDate'], obs['endDate'], obs['type']))
    
    return periods

In [24]:
#record = getPatientDetail(patientset[0])

In [25]:
#print len(record)
#print record[0]

In [None]:
#obs_2 = getPatientObservation(patientset[1])
#print len(obs_2)
#print obs_2[0]

In [51]:
def getPatientMeasurement(patient_id):
    '''
    Get all of a patient measurements
    Returns them as a list
    '''
    patient_record = getPatientRecord(patient_id)
    measurements = list()
    
    for record in patient_record:
        if record[0] == 'measurement':
            measurements.append(record)
            
    return measurements

In [None]:
#measurements = getPatientMeasurement(patientset[5])
#print measurements

## Example for the top DDI

In [13]:
# Get the concepts corresponding to blood pressure measurements (LOINC code)
sys_BP_LOINC = "8480"+"-"+"6"
sys_BP_OMOP = 3004249
info_sys_BP = get_concept_info(sys_BP_OMOP)

dias_BP_LOINC = "8462"+"-"+"4"
dias_BP_OMOP = 3012888
info_dias_BP = get_concept_info(dias_BP_OMOP)

#print info_sys_BP
#print info_sys_BP["VOCABULARY_ID"]

{u'VOCABULARY_ID': u'LOINC', u'CONCEPT_CLASS_ID': u'Clinical Observation', u'STANDARD_CONCEPT_CAPTION': u'Standard', u'CONCEPT_NAME': u'BP systolic', u'INVALID_REASON': u'V', u'STANDARD_CONCEPT': u'S', u'INVALID_REASON_CAPTION': u'Valid', u'CONCEPT_CODE': u'8480-6', u'DOMAIN_ID': u'Measurement', u'CONCEPT_ID': 3004249}
LOINC


In [14]:
# Get the concepts in OHDSI corresponding to the drugpair
top_ddi = (1125315, 800878)
info_drug1 = get_concept_info(top_ddi[0])
info_drug2 = get_concept_info(top_ddi[1])

#print info_drug1
#print len(info_drug1)
#print info_drug2

{u'VOCABULARY_ID': u'RxNorm', u'CONCEPT_CLASS_ID': u'Ingredient', u'STANDARD_CONCEPT_CAPTION': u'Standard', u'CONCEPT_NAME': u'Acetaminophen', u'INVALID_REASON': u'V', u'STANDARD_CONCEPT': u'S', u'INVALID_REASON_CAPTION': u'Valid', u'CONCEPT_CODE': u'161', u'DOMAIN_ID': u'Drug', u'CONCEPT_ID': 1125315}
10


In [19]:
ibuprofen = 1177480
#info_ibuprofen = get_concept_info(ibuprofen)
#print info_ibuprofen
#data = cohort_definition(info_ibuprofen)
#status = create_cohort(data)
#print status
#cohort_id = map_name_id[info_ibuprofen['CONCEPT_NAME']]
#print cohort_id
#info = generate_cohort(cohort_id)
#print info

{u'VOCABULARY_ID': u'RxNorm', u'CONCEPT_CLASS_ID': u'Ingredient', u'STANDARD_CONCEPT_CAPTION': u'Standard', u'CONCEPT_NAME': u'Ibuprofen', u'INVALID_REASON': u'V', u'STANDARD_CONCEPT': u'S', u'INVALID_REASON_CAPTION': u'Valid', u'CONCEPT_CODE': u'5640', u'DOMAIN_ID': u'Drug', u'CONCEPT_ID': 1177480}
226233
http://api.ohdsi.org/WebAPI/cohortdefinition/226233/generate/1PCT
[{u'status': u'PENDING', u'isValid': True, u'id': {u'sourceId': 3, u'cohortDefinitionId': 226233}, u'startTime': 1497884022502, u'executionDuration': 51517}]


In [22]:
#patientset = getAlldistinctPatients(cohort_id)
#print len(patientset)
#print patientset[0]
#info = getPatientObservation(patientset[0])
#print info

http://api.ohdsi.org/WebAPI/cohortresults/1PCT/226233/members/1-30000
22156
10
[(1200009600000, 1267833600000, u'Period while enrolled in insurance')]


In [21]:
#details = getPatientDetail(patientset[0])
#print details

(u'FEMALE', 1938, 170)
