Once the data is loaded into the SQL database, we can create queries for specific visualizations.

In [None]:
%sql

-- View the data
SELECT 
  date,
  snow,
  snow_on_ground
FROM snow_data_db.observations;

In [None]:
%sql

-- We see that the dataset doesn't contain interesting data points for ground snow prior to 1956.
-- Let's narrow the selection.

SELECT 
  date,
  snow_on_ground
FROM snow_data_db.observations
WHERE date > '1955-12-31';



In [None]:
# Let's display the monthly snow average and save as a separate table in the database

df = spark.sql(
    """
WITH cte1 AS (
  SELECT
    date,
    EXTRACT(year FROM date) AS year,
    EXTRACT(month FROM date) AS month_num,
    date_format(date, 'MMMM') AS month,
    EXTRACT(day FROM date) AS day,
    date_format(date, 'EEEE') AS weekday,
    snow_on_ground
  FROM snow_data_db.observations
  WHERE date BETWEEN '1956-01-01' AND '2023-12-31'
)
SELECT
  date,
  year,
  month,
  day,
  weekday,
  snow_on_ground
FROM cte1
ORDER BY date ASC;
"""
)

df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(
    "snow_data_db.snowground"
)