In [0]:
# 00_setup_and_tables - Cell 1

# If you're using Unity Catalog, choose a catalog name here.
# If not, you can ignore catalog and just use database/schema.
CATALOG_NAME = None  # e.g. "main" if you have Unity Catalog
SCHEMA_NAME = "resp_health_db"   # this will be your database/schema

if CATALOG_NAME:
    full_schema = f"{CATALOG_NAME}.{SCHEMA_NAME}"
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {full_schema}")
else:
    full_schema = SCHEMA_NAME
    spark.sql(f"CREATE DATABASE IF NOT EXISTS {full_schema}")

print("Using schema/database:", full_schema)
spark.sql(f"USE {full_schema}")


In [0]:
# 00_setup_and_tables - Cell 2

spark.sql(f"""
CREATE TABLE IF NOT EXISTS respiratory_activity (
    id BIGINT GENERATED ALWAYS AS IDENTITY,
    source_level STRING,          -- 'national' or 'provincial'
    data_source STRING,           -- 'PHAC_FluWatch', 'ON_Resp_Virus', etc.
    report_date DATE,
    province STRING,              -- 'Canada', 'Ontario', etc.
    virus_type STRING,            -- 'Influenza A', 'RSV', 'COVID-19'
    metric_type STRING,           -- 'positivity_rate', 'cases'
    metric_value DOUBLE,
    created_at TIMESTAMP
)
USING DELTA
PARTITIONED BY (province)
""")


In [0]:
# 00_setup_and_tables - Cell 3

spark.sql(f"""
CREATE TABLE IF NOT EXISTS weather_conditions (
    id BIGINT GENERATED ALWAYS AS IDENTITY,
    timestamp TIMESTAMP,
    province STRING,
    location STRING,
    temperature_c DOUBLE,
    wind_chill_c DOUBLE,
    humidity_percent DOUBLE,
    alert_level STRING,      -- e.g. 'Extreme Cold Warning'
    raw_source STRING,       -- JSON text for debugging
    created_at TIMESTAMP
)
USING DELTA
PARTITIONED BY (province)
""")


In [0]:
# 00_setup_and_tables - Cell 4

spark.sql(f"""
CREATE TABLE IF NOT EXISTS risk_scores (
    id BIGINT GENERATED ALWAYS AS IDENTITY,
    date DATE,
    province STRING,
    virus_risk_score DOUBLE,
    cold_risk_score DOUBLE,
    combined_risk_score DOUBLE,
    risk_category STRING,    -- 'Low', 'Moderate', 'High', 'Very High'
    created_at TIMESTAMP
)
USING DELTA
PARTITIONED BY (province)
""")


In [0]:
# CHECK

tables = spark.sql("SHOW TABLES").toPandas()
tables
