In [1]:
import json
import pandas as pd
import ncl.sqlsnippets as snips
import os

from os import getenv
from dotenv import load_dotenv

In [2]:
#Import runtime settings from .env file
def import_settings():
    load_dotenv(override=True)

    return {
        "SQL_ADDRESS": getenv("SQL_ADDRESS"),
        "SQL_DATABASE": getenv("SQL_DATABASE"),
        "SQL_SCHEMA": getenv("SQL_SCHEMA"),
        "SQL_PROC_TABLE": getenv("SQL_PROC_TABLE"),
    }

In [3]:
#Load a given procedure
def find_procedure (id):
    path_procedure = "./data/procedures"

    #Initalise match
    matching_file = False

    #For each file in the procedure directory
    for root, dirs, files in os.walk(path_procedure):
        for filename in files:
            if filename.endswith(".json"):
                file_path = os.path.join(root, filename)

                #Open the json file
                with open(file_path, "r") as json_file:
                    try:
                        #Load the data
                        data = json.load(json_file)

                        #Check if it contains the id
                        if isinstance(data, dict) and "id" in data and data["id"] == id:
                            matching_file = data
                            #Found a match, no need to continue searching
                            break  

                    except json.JSONDecodeError:
                        print(f"Error decoding JSON in {file_path}")

        #If a match was found
        if matching_file:
            break  

    if matching_file == False:
        raise Exception(f"Unable to find id {id}")

    return matching_file

#Get all procedures (for when procedure is not set in .env)
def all_procedures ():
    #Path to procedure directories
    path_procedure = "./data/procedures"

    #Initialise array
    all_ids = []

    #For each file in the directory
    for root, dirs, files in os.walk(path_procedure):
        for filename in files:
            if filename.endswith(".json"):
                file_path = os.path.join(root, filename)

                #Open each json file
                with open(file_path, "r") as json_file:
                    try:
                        data = json.load(json_file)

                        #get the id value of each procedure
                        if isinstance(data, dict) and "id" in data:
                            all_ids.append(data["id"])
                    except json.JSONDecodeError:
                        print(f"Error decoding JSON in {file_path}")

    return all_ids

In [4]:
#Get all procedure ids
procedures = all_procedures()


# Create empty lists to store data
ids = []
names = []
speciality_areas = []
HVLCs = []
priorities = []
benchmarks = []

#For each procedure
for id in procedures:

    #Get the JSON
    data = find_procedure(id)

    # Extract values from the JSON and append to the lists
    ids.append(data["id"])
    names.append(data["name"])
    speciality_areas.append(data["speciality_area"])
    HVLCs.append(data["HVLC"])
    priorities.append(data["priority"])

    if "benchmark" in data:
        benchmarks.append(data["benchmark"])
    else:
        benchmarks.append(None)

# Create a dictionary from the lists
data_dict = {
    "id": ids,
    "name": names,
    "speciality_area": speciality_areas,
    "HVLC": HVLCs,
    "priority": priorities,
    "benchmark": benchmarks
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data_dict)
df = df.sort_values(by=['id'], ascending=[True])

In [5]:
settings = import_settings()
engine = snips.connect(settings["SQL_ADDRESS"], settings["SQL_DATABASE"])

snips.upload_to_sql(df, engine, settings["SQL_PROC_TABLE"], settings["SQL_SCHEMA"], replace=True, chunks=300)