In [0]:
# spark

In [0]:
# spark.version

In [0]:
!pip install requests

In [0]:
# import requests
# requests.get("https://www.cnn.com").text

In [0]:
# spark


In [0]:
# import reverse_geocoder as rg
# coordinates = (51.5214588,-0.1729636),(9.936033, 76.259952),(37.38605,-122.08385)
# rg.search(coordinates)

## Project

In [0]:
# Mount ADLS Gen2
# Required each time the cluster is restarted which should be only on the first notebook as they run in order

tiers = ["bronze", "silver", "gold"]
adls_paths = {tier: f"abfss://{tier}@yagamiazdbsa.dfs.core.windows.net/" for tier in tiers}

# Accessing paths
bronze_adls = adls_paths["bronze"]
silver_adls = adls_paths["silver"]
gold_adls = adls_paths["gold"] 

dbutils.fs.ls(bronze_adls)
dbutils.fs.ls(silver_adls)
dbutils.fs.ls(gold_adls)

In [0]:
import requests
import json
from datetime import date, timedelta 

In [0]:
start_date = date.today() - timedelta(days=1)
end_date = date.today()



In [0]:
print(start_date, end_date)

In [0]:
# Construct the API URL with start and end dates provided by Data Factory, formatted for geojson output.
url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={start_date}&endtime={end_date}"

try:
    # Make the GET request to fetch data
    response = requests.get(url)

    # Check if the request was successful
    response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
    data = response.json().get('features', [])

    if not data:
        print("No data returned for the specified date range.")
    else:
        # Specify the ADLS path
        file_path = f"{bronze_adls}/{start_date}_earthquake_data.json"

        # Save the JSON data
        json_data = json.dumps(data, indent=4)
        dbutils.fs.put(file_path, json_data, overwrite=True)
        print(f"Data successfully saved to {file_path}")
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from API: {e}")

In [0]:
print(json_data)

In [0]:
# Define output parameters to pass to Silver notebook
output_data = {
    "start_date": start_date.isoformat(),
    "end_date": end_date.isoformat(),
    "bronze_path": adls_paths["bronze"],
    "silver_path": adls_paths["silver"],
    "gold_path": adls_paths["gold"]
}

# Set task values for workflow
dbutils.jobs.taskValues.set(key="bronze_output", value=output_data)

print("Bronze notebook completed successfully!")
print(f"Output parameters: {output_data}")