In [1]:
import tarfile
import os
import requests
import glob
import json
import pandas as pd
import numpy as np
from functools import reduce
from IPython.display import HTML

def extract_tar_gz(tar_gz_file, extract_path):
    try:
        # Open the tar.gz file
        with tarfile.open(tar_gz_file, 'r:gz') as tar:
            # Extract all contents
            tar.extractall(path=extract_path)
        print("Extraction successful!")
    except Exception as e:
        print(f"Extraction failed: {e}")

directory = './packages'
extract_package_path = './extracted_packages/'

In [2]:
def find_tgz_packages(directory):
    tgz_files = glob.glob('./packages/*.tgz')
    return tgz_files

In [3]:
tgz_packages = find_tgz_packages(directory)

# Extract each .tgz package
for tgz_package in tgz_packages:
    extract_path = extract_package_path+os.path.splitext(os.path.basename(tgz_package))[0]
    extract_tar_gz(tgz_package, extract_path)

Extraction successful!
Extraction successful!
Extraction successful!
Extraction successful!


In [4]:
''' open each file '''
def openJSONFile(path, warnings):
    ''' loads JSON File returns dict named contents '''
    try:
        with open(path, 'r',encoding="utf8") as j:
            jsonFile = json.loads(j.read())
    except Exception as e:
        print(f"{path} The code has an error that needs to be fixed before it can be checked:{str(e)}")       
        return {}, warnings
    return jsonFile, warnings

'''For each file check the element kind is present and not equal to extension'''
def checkIfProfile(jsonFile):
    '''Will return empty for any retired assets'''
    try:
        if 'type' in jsonFile and jsonFile['type']!='Extension' and jsonFile['resourceType'] == 'StructureDefinition':
            print(jsonFile['url'],jsonFile['resourceType'])
            return jsonFile['type']
    except KeyError as e:
        print(jsonFile['url'],e)
        return None

'''Finds all values with key not equal to 0 (also add *??) '''
def find_attributes(json_data, attribute_dict=None, parent_keys=""):
    if attribute_dict is None:
        attribute_dict = {}

    if isinstance(json_data, dict):
        element = ''
        for key, value in json_data.items():
            if key == 'path':
                element = value
            elif key == 'min' and value!=0:
                attribute_path = f"{parent_keys}.{key}" if parent_keys else key
                attribute_dict[element] = str(value)
            elif isinstance(value, (dict, list)):
                if parent_keys:
                    find_attributes(value, attribute_dict, f"{parent_keys}.{key}")
                else:
                    find_attributes(value, attribute_dict, key)
    elif isinstance(json_data, list):
        for index, item in enumerate(json_data):
            if parent_keys:
                find_attributes(item, attribute_dict, f"{parent_keys}[{index}]")
            else:
                find_attributes(item, attribute_dict, f"[{index}]")

    return attribute_dict

def checkIfSTU3(path,jsonFile):
    url = 'https://3cdzg7kbj4.execute-api.eu-west-2.amazonaws.com/poc/Conformance/FHIR/STU3/$convertR4'

    headers = {
        'accept': 'application/fhir+json',
        'Content-Type': 'application/fhir+json'
    }

    if '3' in jsonFile['fhirVersion']:
        with open(path, 'rb') as stu3_data:
            stu3_file = stu3_data.read()
        response = requests.post(url, headers=headers, data=stu3_file)
        return response.json()
    return jsonFile

In [6]:
table = {}
for path in glob.glob(extract_package_path+'**/package/*.json', recursive=True):
    name = path.split('\\')[-1].split('.')[0]
    warnings = []
    if 'examples' in name or name == "package":
        continue
    jsonFile, warnings = openJSONFile(path, warnings)
    Type = checkIfProfile(jsonFile)
    if Type != None:
        jsonFile = checkIfSTU3(path,jsonFile)
        if Type not in table.keys():
            table[Type] = []
        ''' add filename to dict '''
        attribute_dict = find_attributes(jsonFile)
        dic = {}
        dic[name]=attribute_dict
        table[Type].append(dic) 
    ''' get min for file '''
    if warnings:
        print(os.path.splitext(os.path.basename(tgz_package))[0])
        for x in warnings:
            print(x)

https://fhir.hl7.org.uk/StructureDefinition/UKCore-AllergyIntolerance StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-Appointment StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-Composition StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-Condition StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-DiagnosticReport-Lab StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-DiagnosticReport StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-Encounter StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-FamilyMemberHistory StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-HealthcareService StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-Immunization StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-List StructureDefinition
https://fhir.hl7.org.uk/StructureDefinition/UKCore-Location St

In [7]:
def dict_to_dataframe(data_dict):
    dfs = {}
    for key, value in data_dict.items():
        if type(value) is list:
            list_of_dfs = []
            for profile in range(len(value)):
                list_of_dfs.append(pd.DataFrame.from_dict(value[profile], orient='index').T)
            #dfs[key] = pd.concat(list_of_dfs).replace([np.nan, -np.inf], "")
            try:
                dfs[key] = reduce(lambda  left,right: pd.merge(left,right, left_index=True, right_index=True, how='outer'), list_of_dfs).fillna('')
            except:
                print(f"{key}, {value}")
        else:
            dfs[key] = pd.DataFrame.from_dict(value[0], orient='index').T
    return dfs

# Convert each dictionary into a DataFrame
dataframes = dict_to_dataframe(table)

In [18]:
'''first delete index.html'''


html_file = open("index.html","w")

#HTML(dataframes.to_html(classes='table table-stripped'))
html_file.write('''
<html>
<head>
<style>

    h2 {
        text-align: center;
        font-family: Helvetica, Arial, sans-serif;
    }
    table { 
        margin-left: auto;
        margin-right: auto;
    }
    table, th, td {
        border: 1px solid black;
        border-collapse: collapse;
    }
    th, td {
        padding: 5px;
        text-align: center;
        font-family: Helvetica, Arial, sans-serif;
        font-size: 90%;
    }
    table tbody tr:hover {
        background-color: #dddddd;
    }
    .wide {
        width: 90%; 
    }

</style>
</head>
''')
for key, df in dataframes.items():
    
    html_file.write(f"<h1>{key}</h1>\n")
    html_file.write(df.to_html(classes=["table-bordered", "table-striped", "table-hover"]))
html_file.write("</body></html>")
html_file.close()

In [95]:
def checkIfSTU3(jsonFile):
    url = 'https://3cdzg7kbj4.execute-api.eu-west-2.amazonaws.com/poc/Conformance/FHIR/STU3/$convertR4'

    headers = {
        'accept': 'application/fhir+json',
        'Content-Type': 'application/fhir+json'
    }
    if '3' in jsonFile['fhirVersion']:
        response = requests.post(url, headers=headers, data=jsonFile)
        return response.json()
    return jsonFile


In [11]:
type(dataframes)

dict

In [None]:
json