Once the data is loaded into the SQL database, we can create queries for specific visualizations. 

In [None]:
%sql

-- Select the relevant columns from the table
SELECT 
  date,
  precipitation,
  snow,
  snow_on_ground
FROM snow_data_db.observations
ORDER BY date ASC;

In [None]:
%sql

-- Let's only focus on the 'snow' column and let's exclude 2024 since that year is not yet complete.

SELECT 
  date,
  snow AS snowfall_cm
FROM snow_data_db.observations
-- WHERE date BETWEEN '1956-01-01' AND '2023-12-31'
WHERE date < '2024'
ORDER BY date;



We see that the snow accumulation for 1872 and 1873 seem a bit off compared to the line trend. Let's start at 1874.
<br>Let's also separate the date into year, month, day columns

In [None]:
# Switching back to Python, let's apply the changes and save the query as a separate table in the Hive Metastore

df = spark.sql(
    """
WITH cte1 AS (
  SELECT
    date,
    EXTRACT(year FROM date) AS year,
    EXTRACT(month FROM date) AS month_num,
    date_format(date, 'MMMM') AS month,
    EXTRACT(day FROM date) AS day,
    date_format(date, 'EEEE') AS weekday,
    snow AS snowfall
  FROM snow_data_db.observations
  WHERE date BETWEEN '1874-01-01' AND '2023-12-31'
)

SELECT
  date,
  year,
  month,
  day,
  weekday,
  snowfall
FROM cte1
ORDER BY date ASC;
"""
)

df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(
    "snow_data_db.snowfall_full"
)

In [None]:
# Let's create a shorter table for the last 100 years

df = spark.sql(
    """
WITH cte1 AS (
  SELECT
    date,
    EXTRACT(year FROM date) AS year,
    EXTRACT(month FROM date) AS month_num,
    date_format(date, 'MMMM') AS month,
    EXTRACT(day FROM date) AS day,
    date_format(date, 'EEEE') AS weekday,
    snow AS snowfall
  FROM snow_data_db.observations
  WHERE date BETWEEN '1923-01-01' AND '2023-12-31'
)

SELECT
  date,
  year,
  month,
  day,
  weekday,
  snowfall
FROM cte1
ORDER BY date ASC;
"""
)

df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(
    "snow_data_db.snowfall"
)