# Analysis of NHANES Questionnaire Repsonses


In [1]:
#import relevant packages
import json
import pandas as pd
import numpy as np
import os

from fhirclient import client
from fhirclient import server
import fhirclient.models.questionnaire as q
import fhirclient.models.patient as p
import fhirclient.models.humanname as hn
import fhirclient.models.questionnaireresponse as qr


In [51]:
#read in JSON Questionnaire response files for physical activity:
pa_responses = []
for filename1 in os.listdir('pa_questionnaire_responses'):
    with open("pa_questionnaire_responses/" + filename1, 'r') as qr_file1:
        qrjson1 = json.load(qr_file1)
        pa_qr1 = qr.QuestionnaireResponse(qrjson1)
        pa_responses.append(pa_qr1)
              
#read in JSON Questionnaire response files for mental health:
mh_responses = []
for filename2 in os.listdir('mh_questionnaire_responses'):
    with open("mh_questionnaire_responses/" + filename2, 'r') as qr_file2:
        qrjson2 = json.load(qr_file2)
        pa_qr2 = qr.QuestionnaireResponse(qrjson2)
        mh_responses.append(pa_qr2)

In [80]:
#pull pysical activity questionnaire response data from SMART model and structure as pandas data frame
pa_arrays = []
for response in pa_responses:
    rowid = response.identifier.value
    row = [rowid, 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null']
    for answers in response.item:
        if answers.linkId == 'PAQ605':
            row[1] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ610':
            row[2] = answers.answer[0].valueInteger
        if answers.linkId == 'PAD615':
            row[3] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ620':
            row[4] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ625':
            row[5] = answers.answer[0].valueInteger
        if answers.linkId == 'PAD630':
            row[6] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ635':
            row[7] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ640':
            row[8] = answers.answer[0].valueInteger
        if answers.linkId == 'PAD645':
            row[9] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ650':
            row[10] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ655':
            row[11] = answers.answer[0].valueInteger
        if answers.linkId == 'PAD660':
            row[12] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ665':
            row[13] = answers.answer[0].valueInteger
        if answers.linkId == 'PAQ670':
            row[14] = answers.answer[0].valueInteger
        if answers.linkId == 'PAD675':
            row[15] = answers.answer[0].valueInteger
        if answers.linkId == 'PAD680':
            row[16] = answers.answer[0].valueInteger
    pa_arrays.append(row)

pa_df = pd.DataFrame(pa_arrays, columns =['ID', 'PAQ605', 'PAQ610', 'PAD615', 'PAQ620', 'PAQ625', 'PAD630', 'PAQ635', 'PAQ640', 'PAD645', 'PAQ650', 'PAQ655', 'PAD660', 'PAQ665', 'PAQ670', 'PAD675', 'PAD680']) 
print(pa_df)

          ID  PAQ605 PAQ610 PAD615  PAQ620 PAQ625 PAD630  PAQ635 PAQ640  \
0      96696       2   null    120       2   null   null       2   null   
1     102554       2   null   null       2   null   null       2   null   
2      99563       2   null   null       2   null   null       2   null   
3      99099       2   null   null       2   null   null       2   null   
4      99133       2   null   null       2   null   null       2   null   
...      ...     ...    ...    ...     ...    ...    ...     ...    ...   
5851   99632       2   null    180       2   null   null       2   null   
5852  100638       2   null   null       2   null   null       2   null   
5853  102605       2   null   null       2   null   null       2   null   
5854   98920       2   null    480       2   null   null       2   null   
5855   94000       2   null    480       2   null   null       2   null   

     PAD645  PAQ650 PAQ655 PAD660  PAQ665  PAQ670 PAD675 PAD680  
0      null       2   null   null

In [81]:
#pull mental health questionnaire response data from SMART model and structure as pandas data frame
mh_arrays = []
for response in mh_responses:
    rowid = response.identifier.value
    row = [rowid, 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null']
    if response.item != None:
        for answers in response.item:
            if answers.linkId == 'DPQ010':
                row[1] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ020':
                row[2] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ030':
                row[3] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ040':
                row[4] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ050':
                row[5] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ060':
                row[6] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ070':
                row[7] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ080':
                row[8] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ090':
                row[9] = answers.answer[0].valueInteger
            if answers.linkId == 'DPQ100':
                row[10] = answers.answer[0].valueInteger
    mh_arrays.append(row)

mh_df = pd.DataFrame(mh_arrays, columns =['ID', 'DPQ010', 'DPQ020', 'DPQ030', 'DPQ040', 'DPQ050', 'DPQ060', 'DPQ070', 'DPQ080', 'DPQ090', 'DPQ100']) 
print(mh_df)

          ID DPQ010 DPQ020 DPQ030 DPQ040 DPQ050 DPQ060 DPQ070 DPQ080 DPQ090  \
0      96696      1      0      0      1      0      0      0      1      0   
1     102554   null   null   null   null   null   null   null   null   null   
2      99563      1      0      0      1      0      0      0      1      0   
3      99099      1      0      0      1      0      0      0      1      0   
4      99133      0      0      0      1      0      0      0      1      0   
...      ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
5528   94450      0      0      0      1      0      0      0      1      0   
5529   99632      1      0      0      1      0      0      0      1      0   
5530  100638      0      0      0      1      0      0      0      1      0   
5531  102605      0      0      0      1      0      0      0      1      0   
5532   98920      0      0      0      1      0      0      0      1      0   

     DPQ100  
0         0  
1      null  
2        

In [5]:
#create dictionary first - or look up items by link id

# each row a patient's questionnaire response, and each column being a response to a question item

#first convert to python list

#array of list items with just text

#initiaized Pandas to create a dataframe out of it

#link id will be column header

#answer = answer - but we skipped over null entries
