In [0]:
import urllib.request
import json

dbutils.widgets.text('catalog', 'rearcquest', 'Catalog')
dbutils.widgets.text('schema', 'raw', 'Schema')
dbutils.widgets.text('volume', 'population', 'Volume')
dbutils.widgets.text('sourceUrl', 'https://honolulu-api.datausa.io/tesseract/data.jsonrecords?cube=acs_yg_total_population_1&drilldowns=Year%2CNation&locale=en&measures=Population', 'Source URL')

Catalog = dbutils.widgets.get('catalog') 
Schema = dbutils.widgets.get('schema')
Volume = dbutils.widgets.get('volume')
SourceUrl = dbutils.widgets.get('sourceUrl')
# Configuration
TargetFilename = Volume + "_data.json"

In [0]:
#Note: It is understood that this may lead to false creation. This is considered acceptable given the minimal use case.


In [0]:


try:
    # Create the Schema if it doesn't exist
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {Catalog}.{Schema}")
    print(f"✓ Schema {Catalog}.{Schema} ready")
    
    # Create the Volume if it doesn't exist
    spark.sql(f"CREATE VOLUME IF NOT EXISTS {Catalog}.{Schema}.{Volume}")
    print(f"✓ Volume {Catalog}.{Schema}.{Volume} ready")
    
    # Fetch JSON data from API
    print(f"Fetching data from API...")
    with urllib.request.urlopen(SourceUrl) as response:
        dataJson = json.loads(response.read())
    
    print(f"✓ Fetched {len(dataJson.get('data', []))} records")
    
    # Write JSON to Volume
    volumePath = f"/Volumes/{Catalog}/{Schema}/{Volume}/{targetFilename}"
    jsonContent = json.dumps(dataJson, indent=2)
    dbutils.fs.put(volumePath, jsonContent, overwrite=True)
    
    print(f"✓ Data saved to: {volumePath}")
    print(f"\nYou can access the file at: {volumePath}")
    
except Exception as e:
    raise Exception(f"Error: {str(e)}")
    # Todo - better error handling