## load JSON FHIR Questionnaires and Responses to SMART FHIR Server

In [390]:
#import relevant packages
from fhirclient import client
from fhirclient import server
import json
import pandas as pd
import numpy as np

import fhirclient.models.questionnaire as q
import fhirclient.models.patient as p
import fhirclient.models.humanname as hn
import fhirclient.models.questionnaireresponse as qr
import fhirclient.models.coding as c

#set up smart server
smart = server.FHIRServer(None, 'https://r4.smarthealthit.org')

In [391]:
#set up useful functions:

# pretty print
def pp(json_object):
    print(json.dumps(json_object, indent = 3))
    
def savetofile(json_object, filename):
    with open(filename, 'w') as outfile:
        json.dump(json_object, outfile, indent = 3)

In [392]:
#load mh_questionnaire json
with open('questionnaires/MHQuestionnaire_withValueSet.json', 'r') as h1:
    qjs1 = json.load(h1)
mh_questionnaire = q.Questionnaire(qjs1)

In [393]:
#load pa_questionnaire json
with open('questionnaires/PAQuestionnaire_withValueSet.json', 'r') as h2:
    qjs2 = json.load(h2)
pa_questionnaire = q.Questionnaire(qjs2)

In [394]:
#take loaded mental health json questionnaire and upload to smart server
mh_quest_load = q.Questionnaire.create(mh_questionnaire, smart)
mh_quest_id = mh_quest_load["id"]
print(mh_quest_id)

661149


In [395]:
#take loaded physical activity json questionnaire and upload to smart server
pa_quest_load = q.Questionnaire.create(pa_questionnaire, smart)
pa_quest_id = pa_quest_load["id"]
print(pa_quest_id)

661150


In [396]:
#pull mental health questionnaire from server and read to test:
mh_quest_download = q.Questionnaire.read(mh_quest_id, smart)
mh_quest_download.as_json()

{'id': '661149',
 'meta': {'lastUpdated': '2020-11-11T21:07:01.619-05:00', 'versionId': '1'},
 'code': [{'code': 'DPQ_J',
   'display': 'NHANES Mental Health Depression Questionnaire',
   'system': 'https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/DPQ_J.htm'}],
 'date': '2020-02-01T00:00:00.000Z',
 'item': [{'answerValueSet': '#Mental-Health-Depression-valueset-1',
   'linkId': 'DPQ010',
   'text': 'Over the last 2 weeks, how often have you been bothered by the following problems: little interest or pleasure in doing things? Would you say...',
   'type': 'integer'},
  {'answerValueSet': '#Mental-Health-Depression-valueset-1',
   'linkId': 'DPQ020',
   'text': '[Over the last 2 weeks, how often have you been bothered by the following problems:] feeling down, depressed, or hopeless?',
   'type': 'integer'},
  {'answerValueSet': '#Mental-Health-Depression-valueset-1',
   'linkId': 'DPQ030',
   'text': '[Over the last 2 weeks, how often have you been bothered by the following problems:] trouble 

In [397]:
#pull physical activity questionnaire from server and read to test:
pa_quest_download = q.Questionnaire.read(pa_quest_id, smart)
pa_quest_download.as_json()

{'id': '661150',
 'meta': {'lastUpdated': '2020-11-11T21:07:01.762-05:00', 'versionId': '1'},
 'code': [{'code': 'PAQ_J',
   'display': 'NHANES Physical Activity Questionnaire',
   'system': 'https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/PAQ_J.htm'}],
 'date': '2020-02-01T00:00:00.000Z',
 'item': [{'answerValueSet': '#Physical-Activity-valueset-1',
   'linkId': 'PAQ605',
   'text': "Next I am going to ask you about the time {you spend/SP spends} doing different types of physical activity in a typical week. Think first about the time {you spend/he spends/she spends} doing work. Think of work as the things that {you have/he has/she has} to do such as paid or unpaid work, household chores, and yard work. Does {your/SP's} work involve vigorous-intensity activity that causes large increases in breathing or heart rate like carrying or lifting heavy loads, digging or construction work for at least 10 minutes continuously?",
   'type': 'integer'},
  {'extension': [{'url': 'http://hl7.org/fhir/Str

In [398]:
#Add questionnaire responses for NHANES patients:
#read in csv with questionnaire response data:
pa_data = pd.read_csv('nhanes_data/activity_questionnaire.csv')
dpq_data = pd.read_csv('nhanes_data/depression_questionnaire.csv')


In [399]:
# Verify: Questionnaire's linkId should match the linkIds in QuestionnaireResponse when created.
# Check if csv is the right one. 

# Physical Activity CSV and Questionnaire linkIds
pa_csv_questionids = sorted(list(pa_data.columns.values)[2:])
pa_questionnaire_linkids = sorted([item.linkId for item in pa_quest_download.item])
compare_df_pa = pd.DataFrame({'PA-CSV': pa_csv_questionids, 'PA-FHIR Questionnaire': pa_questionnaire_linkids})
print(compare_df_pa)

# Mental Health CSV and Questionnaire linkIds
mh_csv_questionids = sorted(list(dpq_data.columns.values)[2:])
mh_questionnaire_linkids = sorted([item.linkId for item in mh_quest_download.item])
compare_df_mh = pd.DataFrame({'PA-CSV': mh_csv_questionids, 'PA-FHIR Questionnaire': mh_questionnaire_linkids})


    PA-CSV PA-FHIR Questionnaire
0   pad615                PAD615
1   pad630                PAD630
2   pad645                PAD645
3   pad660                PAD660
4   pad675                PAD675
5   pad680                PAD680
6   paq605                PAQ605
7   paq610                PAQ610
8   paq620                PAQ620
9   paq625                PAQ625
10  paq635                PAQ635
11  paq640                PAQ640
12  paq650                PAQ650
13  paq655                PAQ655
14  paq665                PAQ665
15  paq670                PAQ670


In [400]:
#LOAD PHYSICAL ACTIVITY DATA INTO PY CLIENT MODEL:
coded_qs = ['paq605', 'paq620', 'paq635', 'paq650', 'paq665']
pa_questionnaire_responses = []
looprange = range(0, len(pa_data))

for i in looprange:    
    #assign variables to questionnaire response resource elements:
    #create py client questionnaire response
    pa_qr = qr.QuestionnaireResponse({'status': 'completed', 
                                      'questionnaire': f'Questionnaire/{pa_quest_id}',
                                      "identifier": {
                                        "system": "https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/PAQ_J.htm",
                                        "value": f'{str(int(pa_data.loc[i][1]))}'
                                    }})    

    response_items = []
    #create questionnaire response items for each question, and item answers for each corresponding answer
    for question_id in pa_csv_questionids:
        qr_item = qr.QuestionnaireResponseItem()
        qr_item.linkId = question_id.upper()
        #answerItem:
        #check if Answer for this item is available, if not Skip, we do not add answerItem
        if str(pa_data.loc[i][question_id]) != 'nan':
            if question_id in coded_qs:
                qr_item_answer = qr.QuestionnaireResponseItemAnswer()
                qr_item_answer.valueCoding = c.Coding()
                qr_item_answer.valueCoding.code = str(str(pa_data.loc[i][question_id]))
            else:    
                qr_item_answer = qr.QuestionnaireResponseItemAnswer()
                qr_item_answer.valueInteger = int(int(pa_data.loc[i][question_id]))

            qr_item.answer = [qr_item_answer]

            #add the question text to `qr_item.question`
            for j in range(0, len(pa_quest_download.item)):
                find_id = pa_quest_download.item[j].linkId
                if find_id == question_id.upper():
                    qr_item.text = pa_quest_download.item[j].text

            #add to response list:
            response_items.append(qr_item)
    
    pa_qr.item = response_items
    pa_questionnaire_responses.append(pa_qr)
    continue


In [401]:
#LOAD MENTAL HEALTH DATA INTO PY CLIENT MODEL:

mh_questionnaire_responses = []
looprange = range(0, len(dpq_data))
for i in looprange:
    
    #assign variables to questionnaire response resource elements:
    #create py client questionnaire response
    mh_qr = qr.QuestionnaireResponse({'status': 'completed', 'questionnaire': f'Questionnaire/{mh_quest_id}', 
                                      "identifier": {
                                            "system": "https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/DPQ_J.htm",
                                            "value": f'{str(int(dpq_data.loc[i][1]))}'
                                        }})

    
    response_items = []

    #create questionnaire response items for each question, and item answers for each corresponding answer
    for question_id in mh_csv_questionids:
        qr_item = qr.QuestionnaireResponseItem()
        qr_item.linkId = question_id.upper()
        #answerItem:
        #check if Answer for this item is available, if not Skip, we do not add answerItem
        if str(dpq_data.loc[i][question_id]) == 'nan':
            #skipped!
            continue 
            
        qr_item_answer = qr.QuestionnaireResponseItemAnswer()
        qr_item_answer.valueCoding = c.Coding()
        qr_item_answer.valueCoding.code = str(str(dpq_data.loc[i][question_id]))
        
        qr_item.answer = [qr_item_answer]
        
        #add the question text to `qr_item.question`
        for j in range(0, len(mh_quest_download.item)):
            find_id = mh_quest_download.item[j].linkId
            if find_id == question_id.upper():
                qr_item.text = mh_quest_download.item[j].text
        
        
        #add to response list:
        response_items.append(qr_item)
        
    
    
    mh_qr.item = response_items
    mh_questionnaire_responses.append(mh_qr)
    continue
    

In [402]:
#save each physical activity questionnaire response to a json file on local disk:
for i in range(0, len(pa_questionnaire_responses)):
    savetofile(pa_questionnaire_responses[i].as_json(), 'pa_questionnaire_responses/qr_'+str(int(pa_data.loc[i][1]))+'.json')


In [403]:
#save each mental health questionnaire response to a json file on local disk:
for i in range(0, len(mh_questionnaire_responses)):
    savetofile(mh_questionnaire_responses[i].as_json(), 'mh_questionnaire_responses/qr_'+str(int(dpq_data.loc[i][1]))+'.json')


In [404]:
# Upload physical activity questionnaire response to Server and print returned ID
pa_response_load = qr.QuestionnaireResponse.create(pa_questionnaire_responses[0], smart)
pa_response_id = pa_response_load["id"]
print(pa_response_id)


661151


In [405]:
# Upload mental health questionnaire response to Server and print returned ID
mh_response_load = qr.QuestionnaireResponse.create(mh_questionnaire_responses[0], smart)
mh_response_id = mh_response_load["id"]
print(mh_response_id)


661152


In [406]:
#print example questionnaire responses
print(pa_questionnaire_responses[0].as_json())
print("")
print(mh_questionnaire_responses[0].as_json())


{'identifier': {'system': 'https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/PAQ_J.htm', 'value': '93705'}, 'item': [{'answer': [{'valueInteger': 60}], 'linkId': 'PAD675', 'text': 'How much time {do you/does SP} spend doing moderate-intensity sports, fitness or recreational activities on a typical day?'}, {'answer': [{'valueInteger': 300}], 'linkId': 'PAD680', 'text': 'The following question is about sitting at school, at home, getting to and from places, or with friends including time spent sitting at a desk, traveling in a car or bus, reading, playing cards, watching television, or using a computer. Do not include time spent sleeping. How much time {do you/does SP} usually spend sitting on a typical day?'}, {'answer': [{'valueCoding': {'code': '2.0'}}], 'linkId': 'PAQ605', 'text': "Next I am going to ask you about the time {you spend/SP spends} doing different types of physical activity in a typical week. Think first about the time {you spend/he spends/she spends} doing work. Think of work 

In [412]:
#pa mh questionnaires as ndjson for testing:
with open('questionnaires.ndjson', 'a') as all_flat:
    json.dump(pa_questionnaire.as_json(), all_flat)
    all_flat.write("\n")
    json.dump(mh_questionnaire.as_json(), all_flat)
    all_flat.write("\n")
    

In [413]:
#pa responses, mh responses as ndjson for testing:
with open('qr_responses.ndjson', 'a') as all_flat:
    for i in mh_questionnaire_responses:
        json.dump(i.as_json(), all_flat)
        all_flat.write("\n")
    for i in pa_questionnaire_responses:
        json.dump(i.as_json(), all_flat)
        all_flat.write("\n")

In [407]:
#pa responses, mh responses, and questionnaires as ndjson for testing:
with open('all_data_flat.ndjson', 'a') as all_flat:
    json.dump(pa_questionnaire.as_json(), all_flat)
    all_flat.write("\n")
    json.dump(mh_questionnaire.as_json(), all_flat)
    all_flat.write("\n")
    for i in mh_questionnaire_responses:
        json.dump(i.as_json(), all_flat)
        all_flat.write("\n")
    for i in pa_questionnaire_responses:
        json.dump(i.as_json(), all_flat)
        all_flat.write("\n")
    

In [408]:
#pa responses, mh responses, and questionnaires as txt for testing:
with open('all_data_flat.txt', 'a') as all_flat:
    json.dump(pa_questionnaire.as_json(), all_flat)
    all_flat.write("\n")
    json.dump(mh_questionnaire.as_json(), all_flat)
    all_flat.write("\n")
    for i in mh_questionnaire_responses:
        json.dump(i.as_json(), all_flat)
        all_flat.write("\n")
    for i in pa_questionnaire_responses:
        json.dump(i.as_json(), all_flat)
        all_flat.write("\n")
    

In [409]:
#import as ndJSON for validation

import ndjson

# Streaming lines from ndjson file:
with open('all_data_flat.ndjson') as f:
    data = ndjson.load(f)
    
text = ndjson.dumps(data)
text[1:4500]

'"contained": [{"id": "Physical-Activity-valueset-1", "compose": {"include": [{"concept": [{"code": "1", "display": "Yes"}, {"code": "2", "display": "No"}, {"code": "7", "display": "Refused"}, {"code": "9", "display": "Don\'t Know"}], "system": "http://hl7.org/fhir/sid/icd-10"}]}, "description": "Physical-Activity Questionnaire Response Value Set 1", "name": "Physical-Activity ValueSet 1", "status": "active", "resourceType": "ValueSet"}], "code": [{"code": "PAQ_J", "display": "NHANES Physical Activity Questionnaire", "system": "https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/PAQ_J.htm"}], "date": "2020-02-01T00:00:00.000Z", "item": [{"answerValueSet": "#Physical-Activity-valueset-1", "linkId": "PAQ605", "text": "Next I am going to ask you about the time {you spend/SP spends} doing different types of physical activity in a typical week. Think first about the time {you spend/he spends/she spends} doing work. Think of work as the things that {you have/he has/she has} to do such as paid or unpa