# Create patients.ipynb
# Setting up the dictionary to read in the patients csv so that it can be imported in JSON format

In [2]:
import pandas as pd
import numpy as np
import csv
import itertools
import json
import pprint
idVars = [
    'id',
    'age',
    'ethnicity',
    'gender'
]
readingsVars = [
    'durationOfT2DM',
    'egfr',
    'haemoglobin',
    'hba1c',
    'hdlCholesterol',
    'heartRate',
    'height',
    'ldlCholesterol',
    'systolicBloodPressure',
    'wbc',
    'weight',
]
conditionsVars = [
    'af',
    'albuminuria',
    'currentSmoker',
    'history_Amputation',
    'history_BlindnessOneEye',
    'history_CHF',
    'history_IHD',
    'history_MI',
    'history_Stroke',
    'pvd',
]
file = pd.read_csv('./patients.csv') #, parse_dates=conditionsVars)
core = file[idVars]
cond = pd.melt(
    file, 
    id_vars='id', 
    value_vars=conditionsVars, 
    var_name='conditionName', 
    value_name='startDate'
).dropna().groupby('id').apply(
    lambda x: x[['conditionName', 'startDate']].to_dict('records')
).reset_index().rename(
    columns={0: 'conditions'}
)
read = pd.melt(
    file, 
    id_vars='id', 
    value_vars=readingsVars, 
    var_name='readingName', 
    value_name='value'
).dropna().groupby('id').apply(
    lambda x: x[['readingName', 'value']].to_dict('records')
).reset_index().rename(
    columns={0: 'readings'}
)
output = core.merge(cond, how='left').merge(read, how='left').to_json(orient='records')
pprint.pprint(json.loads(output)[0: 2])

[{'age': 'youg',
  'conditions': None,
  'ethnicity': 'a',
  'gender': 'm',
  'id': 1,
  'readings': [{'readingName': 'durationOfT2DM', 'value': 'short'},
               {'readingName': 'egfr', 'value': 'high'},
               {'readingName': 'haemoglobin', 'value': 'low'},
               {'readingName': 'hba1c', 'value': 'high'},
               {'readingName': 'hdlCholesterol', 'value': 'high'},
               {'readingName': 'heartRate', 'value': 'medium'},
               {'readingName': 'height', 'value': 180},
               {'readingName': 'ldlCholesterol', 'value': 'high'},
               {'readingName': 'systolicBloodPressure', 'value': 'low'},
               {'readingName': 'wbc', 'value': 'high'},
               {'readingName': 'weight', 'value': 40}]},
 {'age': 'old',
  'conditions': [{'conditionName': 'albuminuria', 'startDate': '01/04/2018'},
                 {'conditionName': 'currentSmoker', 'startDate': '01/04/2018'},
                 {'conditionName': 'history_Blindness

In [3]:
json_output = json.loads(output)
json_output[0:2]


[{'id': 1,
  'age': 'youg',
  'ethnicity': 'a',
  'gender': 'm',
  'conditions': None,
  'readings': [{'readingName': 'durationOfT2DM', 'value': 'short'},
   {'readingName': 'egfr', 'value': 'high'},
   {'readingName': 'haemoglobin', 'value': 'low'},
   {'readingName': 'hba1c', 'value': 'high'},
   {'readingName': 'hdlCholesterol', 'value': 'high'},
   {'readingName': 'heartRate', 'value': 'medium'},
   {'readingName': 'height', 'value': 180},
   {'readingName': 'ldlCholesterol', 'value': 'high'},
   {'readingName': 'systolicBloodPressure', 'value': 'low'},
   {'readingName': 'wbc', 'value': 'high'},
   {'readingName': 'weight', 'value': 40}]},
 {'id': 2,
  'age': 'old',
  'ethnicity': 'a',
  'gender': 'f',
  'conditions': [{'conditionName': 'albuminuria', 'startDate': '01/04/2018'},
   {'conditionName': 'currentSmoker', 'startDate': '01/04/2018'},
   {'conditionName': 'history_BlindnessOneEye', 'startDate': '01/04/2018'},
   {'conditionName': 'history_CHF', 'startDate': '01/04/2018'},

In [4]:
json.maxItemsComputed = 20000

In [6]:
with open('./patient_infos.json', 'w') as jsonFile:
    # make it more readable
    jsonFile.write(json.dumps(json_output, indent=4))