Synthea runs on Java Development Kit (JDK) 17, so use a cluster that has DBR 16.0 or above, as JDK 17 is the default. Check that you have JDK 17 installed:

In [0]:
%sh
java -version

In [0]:
dbutils.widgets.text(name = "catalog_name", defaultValue="mcutini", label="Catalog Name")
dbutils.widgets.text(name = "schema_name", defaultValue="andrea", label="Schema Name")
dbutils.widgets.text(name = "destination", defaultValue="./output/", label = "Base Directory")
volume_path = f"/Volumes/{catalog_name}/{schema_name}/synthetic_files_raw/"

In [0]:
catalog_name = dbutils.widgets.get(name = "catalog_name")
schema_name = dbutils.widgets.get(name = "schema_name")
destination = dbutils.widgets.get(name = "destination")
volume_path = f"/Volumes/{catalog_name}/{schema_name}/synthetic_files_raw/"

In [0]:
try:
    # Code that may raise an exception
    dbutils.fs.ls(f"{volume_path}synthea_config.txt")
    result = "True"  # Return 0 if it works
except:
    result = "False"  # Return 1 if an exception occurs

result  # Return the result

In [0]:
dbutils.jobs.taskValues.set(key = 'result', value = result)

In [0]:
%sql
create catalog if not exists ${catalog_name};
use catalog ${catalog_name};

In [0]:
%sql
create schema if not exists ${schema_name};
use schema ${schema_name};

In [0]:
%sql
create volume if not exists synthetic_files_raw;

In [0]:
# Retrieve the latest Synthea release
from urllib.request import urlretrieve
urlretrieve(
  url = "https://github.com/synthetichealth/synthea/releases/download/master-branch-latest/synthea-with-dependencies.jar"
  ,filename = f"{volume_path}synthea-with-dependencies.jar"
)

In [0]:
# Execute the Synthea JAR one time to initialize
command = f"""
cd {volume_path}
java -jar synthea-with-dependencies.jar
"""

In [0]:
# Create a Synthea configuration file and write it to the volume
config_file_text = (
f"""# synthea streaming simulation configuration file
exporter.ccda.export = false
exporter.fhir.export = false
exporter.csv.export = true
exporter.csv.folder_per_run = true

exporter.baseDirectory = {destination}
generate.append_numbers_to_person_names = false
generate.default_population = 50
exporter.clinical_note.export = true
""")

filename = f"{volume_path}synthea_config.txt"

with open(filename, "w") as f:
    f.write(config_file_text)

f.close()

In [0]:
def data_generator(volume_path: str = volume_path, config_file_path: str = f"{volume_path}synthea_config.txt", additional_options: str = "", verbose: bool = False):
  command = (
  f"""cd {volume_path}
  java -jar synthea-with-dependencies.jar -c {config_file_path} {additional_options}
  """)
  if verbose == True:
    print(command)
  result = subprocess.run([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
  return result

In [0]:
import subprocess
run_results = data_generator(

   volume_path=volume_path
  ,config_file_path=f"{volume_path}synthea_config.txt"
  ,additional_options="Utah"
  ,verbose=True
)