In [0]:
#Required each time the cluster is restarted which should be only on the first notebook as they run in order
tiers=['bronze','silver','gold']
adls_path = {tier:f"abfss://{tier}@mkrishna.dfs.core.windows.net//" for tier in tiers}
#accessing paths
bronze_path = adls_path['bronze']
silver_path = adls_path['silver']
gold_path = adls_path['gold']
dbutils.fs.ls(bronze_path)
dbutils.fs.ls(silver_path)
dbutils.fs.ls(gold_path)    

In [0]:
import requests
import json
from datetime import date, timedelta


In [0]:
start_date = date.today() - timedelta(1)
end_date = date.today()

In [0]:
#Construct the API URL with start and end dates provided by data factory, formated for geojson output.
url=f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={start_date}&endtime={end_date}"
try:
    #make the Get request to fetch data
    response=requests.get(url)
    response.raise_for_status()
    data=response.json().get('features',[])
    if not data:
        print("No data returned for the specified date range.")
    else:
        file_path=f"{bronze_path}/{start_date}_earthquake_data.json"
        json_data=json.dumps(data,indent=4)
        dbutils.fs.put(file_path,json_data,overwrite=True)
        print(f"Data saved to {file_path}")
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from api: {e}")

In [0]:
output_data={
    "start_date":start_date.isoformat(),
    "end_date":end_date.isoformat(),
    "bronze_path":bronze_path,
    "silver_path":silver_path,
    "gold_path":gold_path
}
dbutils.jobs.taskValues.set(key="bronze_output",value=output_data)

