## Defining Parameters and Functions

In [8]:
import boto3
import pandas as pd
import plotly.graph_objects as go
import io
import os
from dotenv import load_dotenv
import toml

# Initializing parameters
app_config = toml.load('config_file.toml')
S3_BUCKET = app_config['aws']['bucket_name']
REGION = app_config['aws']['aws_region']
S3_PREFIX = ""
METRICS = ["temp_celsius", "humidity", "cloud_cover", "precipitation_mm", "wind_speed_kph", "uv_index"] # Metrics to analyze
ACCESS_KEY = os.getenv('ACCESS_KEY')
SECRET_KEY = os.getenv('SECRET_KEY')

# Adjusting color scales of the different metrics
temperature_colorscale = [
    [0.0, "rgb(0, 0, 255)"],       # Blue
    [1.0, "rgb(255, 0, 0)"]        # Red
]

precipitation_colorscale = [
    [0.0, "rgb(173, 216, 230)"],   # Light blue
    [1.0, "rgb(0, 0, 139)"]        # Dark blue
]

wind_speed_colorscale = [
    [0.0, "rgb(216, 191, 216)"],   # Light purple (thistle)
    [1.0, "rgb(128, 0, 128)"]      # Dark purple
]

uv_index_colorscale = [
    [0.0, "rgb(255, 255, 0)"],     # Yellow
    [1.0, "rgb(255, 0, 0)"]        # Red
]

humidity_colorscale = [
    [0.0, "rgb(255, 0, 0)"],       # Red
    [1.0, "rgb(0, 0, 255)"]        # Blue
]

cloud_cover_colorscale = [
    [0.0, "rgb(211, 211, 211)"],   # Light grey
    [1.0, "rgb(64, 64, 64)"]        # Dark grey
]

# Mapping metric to its colorscale
custom_colorscale_dict = {
    "temp_celsius": temperature_colorscale,
    "precipitation_mm": precipitation_colorscale,
    "wind_speed_kph": wind_speed_colorscale,
    "uv_index": uv_index_colorscale,
    "humidity": humidity_colorscale,
    "cloud_cover": cloud_cover_colorscale
}

# Initialize S3
s3 = boto3.client("s3", 
                  region_name=REGION,
                  aws_access_key_id=ACCESS_KEY,
                  aws_secret_access_key=SECRET_KEY)


def list_parquet_files(bucket, prefix):
    '''List all parquet files in the path'''
    paginator = s3.get_paginator('list_objects_v2')
    files = []

    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        for obj in page.get("Contents", []):
            if obj["Key"].endswith(".parquet"):
                files.append(obj["Key"])
    return files


def load_all_parquet(bucket, keys):
    '''Download and load all files into one DataFrame'''
    dfs = []
    for key in keys:
        obj = s3.get_object(Bucket=bucket, Key=key)
        buffer = io.BytesIO(obj["Body"].read())
        df = pd.read_parquet(buffer)
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)


def plot_heatmap(df, metrics):
    '''Plot heatmap with dropdown'''
    
    traces = []
    for metric in metrics:
        # Creating a mapbox for each metric
        traces.append(
            go.Scattermapbox(
                lat=df["latitude"],
                lon=df["longitude"],
                mode="markers",
                marker=dict(
                    size=30,  # Make dots bigger
                    color=df[metric],
                    colorscale=custom_colorscale_dict.get(metric, "Viridis"),
                    cmin=df[metric].min(),
                    cmax=df[metric].max(),
                    colorbar=dict(title=metric.capitalize())
                ),
                name=metric,
                visible=(metric == metrics[0]),
                hovertext=df[metric].round(2).astype(str),  # Show value
                hoverinfo="text"
            )
        )

    # Dropdown menu
    buttons = []
    for i, metric in enumerate(metrics):
        visible = [False] * len(metrics)
        visible[i] = True
        buttons.append(dict(
            label=metric.capitalize(),
            method="update",
            args=[{"visible": visible},
                  {"title": f"Weather Heatmap - {metric.capitalize()}"}]
        ))

    fig = go.Figure(data=traces)
    # Adding mapbox to figure
    fig.update_layout(
        title=f"Weather Heatmap - {metrics[0].capitalize()}",
        mapbox=dict(
            center=dict(lat=49.5, lon=-122.5),
            zoom=8,
            style="open-street-map"
        ),
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            x=0.05,
            y=1.15,
            showactive=True
        )],
        margin={"r": 0, "t": 40, "l": 0, "b": 0}
    )

    fig.show()

## Building Graph

In [9]:
print("Listing parquet files from S3...")
parquet_keys = list_parquet_files(S3_BUCKET, S3_PREFIX)
print(f"Found {len(parquet_keys)} files")

# Defining how to aggregate the different metrics
agg_funcs = {
    "temp_celsius": "mean",
    "humidity": "mean",
    "precipitation_mm": "sum",     # Precipitation is additive
    "cloud_cover": "mean",
    "wind_speed_kph": "mean",
    "uv_index": "mean"
}

if parquet_keys:
    print("Loading data...")
    df = load_all_parquet(S3_BUCKET, parquet_keys)
    
    print(f"Loaded {len(df)} records")
    # Group by and average for df
    df_grouped = df.groupby(['latitude', 'longitude', 'updated_time'], as_index=False).agg(agg_funcs).reset_index()
    print(f"After grouping, there exists {len(df_grouped)} unique points.")
    plot_heatmap(df_grouped, METRICS)

Listing parquet files from S3...
Found 25 files
Loading data...
Loaded 2749 records
After grouping, there exists 985 unique points.


## Insights

### Temperature

The further inland you go, such as Abbotsford, the hotter it gets. Due to the lack of moderating bodies of water. This is fairly consistent with colors getting more red (hot) the further east you go.

### Humidity

Similarly, the further inland you go (east), the lower the humidity is. Again, this is due to the ocean adding to the relative humidity. On the west, the humidity is nearly 75%. This is especially pronounced in the far north in the Garibaldi range, where the humidity starts off at 55% on the west portion and drops to 25% in the east section. This is because British Columbia's coast juts out northwest. So by the time you are at the 50th degree of latitude, you are already quite far inland. Think of the section from Squamish to Whistler where it's all mountain. While just 50-80km south, you're at the beach.

### Cloud Cover

This one is inconclusive. Because when I recorded this, it was on a clear sunny day. There are pockets of cloud cover but they're the sporadic cloud. However, I could imagine the mountainous areas having thicker cloud cover on a more overcast day due to the orographic rainfall effect you typically see in the North Shore.

### Precipitation

Again, this one is inconclusive. It was a clear sunny day in the summer. Again, on a wetter day in the winter or spring, there would likely be more precipitation where the mountains are due to orographic rainfall. So the south sections would be drier while the north sections, where the mountains are, would be wetter. However, this would take a much longer period of data collection where noise gets flattened out.

### Wind Speed

Northwest sections are the least windy. While the rest have a consistent level of wind. This could just be noise. But the northwest sections being hte least windy could be because mountains break wind. While the southeast (eg. Langley, Abbotsford) are windy because it's relatively flat in those areas with nothing to break the wind. However, to see real trends, this would require collection data for days rather than 2 hours which was what I did. Due to free tier API limitations and having to leave an EC2 instance running, which could get expensive real fast. 

### UV Index

UV index is consistent for the most part. This isn't too surprising. It was a clear day with little variance in cloud cover. The far north section near Garibaldi has the highest UV index. This could likely be because the elevation is close to 2000m in those areas, which means there is less air to filter out UV rays. Thus the UV index would be the highest.