In [0]:
bronze_output = dbutils.jobs.taskValues.get(taskKey="Bronze", key="bronze_output")
silver_data = dbutils.jobs.taskValues.get(taskKey="Silver", key="silver_output")

start_date = bronze_output.get("start_date", "")
silver_adls = bronze_output.get("silver_adls", "")
gold_adls = bronze_output.get("gold_adls", "")

print(f"Start Date: {start_date}, Gold ADLS: {gold_adls}")

In [0]:
from pyspark.sql.functions import when, col, udf
from pyspark.sql.types import StringType
import reverse_geocoder as rg
from datetime import date, timedelta

In [0]:
df = spark.read.parquet(silver_data).filter(col('time') > start_date)

In [0]:
def get_country_code(lat , lon):
    """
    Retrieve the country code for a given latitude and longitude.

    Parameters:
    lat (float or str): Latitude of the location.
    lon (float or str): Longitude of the location.

    Returns:
    str: Country code of the location, retrieved using the reverse geocoding API.

    Example:
    >> get_country_details(48.8588443, 2.2943506)
    'FR
    """   
    try:
           coordinates = (float(lat) , float(lon))
           result = rg.search(coordinates)[0].get("cc")
           print(f"processed coordinates : {coordinates} -> {result}")
           return result
    except Exception as e:
           print(f"Error processing coordinates: {lat}, {lon} -> {str(e)}")
           return None


In [0]:
get_country_code_udf = udf(get_country_code, StringType())

In [0]:
df_with_location = df.withColumn('country_code', get_country_code_udf(col('latitude'), col('longitude')))

In [0]:
df_with_location_sig_class = \
                            df_with_location.\
                                withColumn('sig_class', when(col('sig') < 100, "low").\
                                                        when((col('sig') < 100) & (col('sig') < 500), "moderate").\
                                                        otherwise("high")
                                                        )

In [0]:
gold_output_path = f"{gold_adls}earthquake_events_gold/"

In [0]:
df_with_location_sig_class.write.mode('append').parquet(gold_output_path)