In [1]:
# import statements
import json
import pandas as pd

In [41]:
# read the USDM JSON for the LZZT study
in_file = "data/usdm.json"
try:
    with open(in_file, 'r') as f:
        usdm = json.load(f)
except FileNotFoundError:
    print('The file {} was not found...'.format(in_file))

In [42]:
# Create the required study dictionary lists
bcs = usdm['study']['versions'][0]['biomedicalConcepts']

In [60]:
# Create the dataframe
bcsDF = pd.DataFrame(bcs, columns=['name','label','synonyms','code','properties'])
bcsDF

Unnamed: 0,name,label,synonyms,code,properties
0,Sex,Sex,[],"{'id': 'AliasCode_150', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_116', 'exte..."
1,Race,Race,[Racial Group],"{'id': 'AliasCode_152', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_117', 'exte..."
2,Temperature,Temperature,"[Temperature, Body Temperature]","{'id': 'AliasCode_157', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_118', 'exte..."
3,Weight,Weight,"[Weight, Body Weight]","{'id': 'AliasCode_161', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_122', 'exte..."
4,Height,Height,"[Height, Body Height]","{'id': 'AliasCode_165', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_125', 'exte..."
5,Alanine Aminotransferase Concentration in Seru...,Alanine Aminotransferase Concentration in Seru...,"[ALT, SGPT, Alanine Aminotransferase Measurement]","{'id': 'AliasCode_171', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_128', 'exte..."
6,Albumin Presence in Urine,Albumin Presence in Urine,"[Albumin, Albumin Measurement]","{'id': 'AliasCode_176', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_133', 'exte..."
7,Alkaline Phosphatase Concentration in Serum/Pl...,Alkaline Phosphatase Concentration in Serum/Pl...,"[Alkaline Phosphatase, Alkaline Phosphatase Me...","{'id': 'AliasCode_182', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_137', 'exte..."
8,Aspartate Aminotransferase in Serum/Plasma,Aspartate Aminotransferase in Serum/Plasma,"[AST, SGOT, Aspartate Aminotransferase Measure...","{'id': 'AliasCode_188', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_142', 'exte..."
9,Creatinine Concentration in Urine,Creatinine Concentration in Urine,"[Creatinine, Creatinine Measurement]","{'id': 'AliasCode_194', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_147', 'exte..."


In [52]:
# Wrangle the dataframe
codes = pd.json_normalize(bcsDF.code)
codes.columns = [f'code_{col}' for col in codes.columns]

bcsDF = pd.concat([bcsDF,codes], axis=1)
bcsDF = bcsDF.drop(columns=['code_id','code_extensionAttributes','code_standardCodeAliases','code_instanceType','code_standardCode.extensionAttributes',
                           'code_standardCode.codeSystem','code_standardCode.codeSystemVersion','code_standardCode.instanceType','code_standardCode.id'])
bcsDF.rename(columns={'code_standardCode.code': 'standard_code', 'code_standardCode.decode': 'standard_decode'}, inplace=True)
bcsDF = bcsDF[['name','label','standard_code','standard_decode','code','properties','synonyms']]

In [53]:
bcsDF.head()

Unnamed: 0,name,label,standard_code,standard_decode,code,properties,synonyms
0,Sex,Sex,C28421,Sex,"{'id': 'AliasCode_150', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_116', 'exte...",[]
1,Race,Race,C17049,Race,"{'id': 'AliasCode_152', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_117', 'exte...",[Racial Group]
2,Temperature,Temperature,C174446,TEMP,"{'id': 'AliasCode_157', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_118', 'exte...","[Temperature, Body Temperature]"
3,Weight,Weight,C25208,WEIGHT,"{'id': 'AliasCode_161', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_122', 'exte...","[Weight, Body Weight]"
4,Height,Height,C25347,HEIGHT,"{'id': 'AliasCode_165', 'extensionAttributes':...","[{'id': 'BiomedicalConceptProperty_125', 'exte...","[Height, Body Height]"


In [57]:
codes = pd.json_normalize(bcsDF.code)
codes.columns = [f'code_{col}' for col in codes.columns]
newDF = pd.concat([bcsDF,codes], axis=1)
newDF = newDF.drop(columns=['code', 'code_id', 'code_extensionAttributes', 'code_standardCodeAliases', 'code_instanceType', 'code_standardCode.id',
                           'code_standardCode.extensionAttributes', 'code_standardCode.codeSystem', 'code_standardCode.codeSystemVersion','code_standardCode.instanceType'])
newDF.head()

Unnamed: 0,name,label,standard_code,standard_decode,properties,synonyms,code_standardCode.code,code_standardCode.decode
0,Sex,Sex,C28421,Sex,"[{'id': 'BiomedicalConceptProperty_116', 'exte...",[],C28421,Sex
1,Race,Race,C17049,Race,"[{'id': 'BiomedicalConceptProperty_117', 'exte...",[Racial Group],C17049,Race
2,Temperature,Temperature,C174446,TEMP,"[{'id': 'BiomedicalConceptProperty_118', 'exte...","[Temperature, Body Temperature]",C174446,TEMP
3,Weight,Weight,C25208,WEIGHT,"[{'id': 'BiomedicalConceptProperty_122', 'exte...","[Weight, Body Weight]",C25208,WEIGHT
4,Height,Height,C25347,HEIGHT,"[{'id': 'BiomedicalConceptProperty_125', 'exte...","[Height, Body Height]",C25347,HEIGHT


In [87]:
for key, value in newDF['properties'][0:2].items():
    print((value))

[{'id': 'BiomedicalConceptProperty_116', 'extensionAttributes': [], 'name': 'Sex', 'label': 'Sex', 'isRequired': True, 'isEnabled': True, 'datatype': 'string', 'responseCodes': [{'id': 'ResponseCode_151', 'extensionAttributes': [], 'isEnabled': True, 'code': {'id': 'Code_435', 'extensionAttributes': [], 'code': 'C20197', 'codeSystem': 'http://www.cdisc.org', 'codeSystemVersion': '2023-12-15', 'decode': 'Male', 'instanceType': 'Code'}, 'instanceType': 'ResponseCode'}, {'id': 'ResponseCode_152', 'extensionAttributes': [], 'isEnabled': True, 'code': {'id': 'Code_436', 'extensionAttributes': [], 'code': 'C16576', 'codeSystem': 'http://www.cdisc.org', 'codeSystemVersion': '2023-12-15', 'decode': 'Female', 'instanceType': 'Code'}, 'instanceType': 'ResponseCode'}], 'code': {'id': 'AliasCode_149', 'extensionAttributes': [], 'standardCode': {'id': 'Code_437', 'extensionAttributes': [], 'code': 'C28421', 'codeSystem': 'http://www.cdisc.org', 'codeSystemVersion': '2023-12-15', 'decode': 'Sex', 'i