# Data preparation
This notebook merges the [Canadian Wildfire Dataset](https://www.kaggle.com/datasets/ulasozdemir/wildfires-in-canada-19502021) from Kaggle and [Canadian Wildland Fire Information System](https://cwfis.cfs.nrcan.gc.ca/downloads/activefires/) with weather data from Environment and Climate Change Canada (ECCC) and Meteorological Service of Canada (MSC) by using the [MSC GeoMet API](https://eccc-msc.github.io/open-data/msc-geomet/readme_en/)

In [2]:
from owslib.ogcapi.features import Features
import numpy
import pandas as pd
import json
from datetime import date

# Middle point
Getting average latitude and longitude from which to query a box of weather stations

In [3]:
wildfire_df = pd.read_csv('2000-2021+2023-2024.csv')
wildfire_df

Unnamed: 0,agency,lat,lon,date,hectares,cause,response_type
0,BC,50.9050,-126.9292,2000-03-10,6.00,H,
1,BC,49.2971,-122.2321,2000-08-21,0.20,H,
2,BC,55.8000,-124.8167,2005-06-20,0.01,L,
3,BC,49.8770,-121.5730,2000-08-06,0.10,L,
4,BC,49.0667,-121.8333,2000-07-28,0.10,H,
...,...,...,...,...,...,...,...
148756,AK,65.0282,-141.2460,2024-05-15,0.00,,
148757,BC,55.9370,-121.9170,2024-05-12,295.00,,FUL
148758,PC,60.2150,-112.8720,2024-04-26,0.10,,FUL
148759,PC,60.1950,-112.7960,2024-04-26,0.10,,FUL


In [13]:
avg_lat = wildfire_df['lat'].mean()
avg_lon = wildfire_df['lon'].mean()
print('Average latitude: ' + str(avg_lat))
print('Average longitude: ' + str(avg_lon))

Average latitude: 52.032306654095855
Average longitude: -102.52012118542373


# Querying MSC by using OGC API - Features Request
Parameters to query

In [28]:
# Coordinate
lat = 52.032306654095855
lon = -102.52012118542373

# Buffer size (km)
buffer = 100

# Timeframe to query data
start_date = date(2000, 3, 10)
end_date = date(2024, 5, 5)
time_ = f"{start_date}/{end_date}"

# Bounding box
# The buffer needs to be transformed in degrees to get
# the coordinates of the corners of the bounding box:
# Latitude: 1 km ≈ 0.009°
# Longitude (at the 49th parallel): 1 km ≈ 0.014°
bbox = [
    lon - buffer * 0.02,
    lat - buffer * 0.01,
    lon + buffer * 0.02,
    lat + buffer * 0.01,
]

Retrieve [climate station data](https://api.weather.gc.ca/collections/climate-stations?lang=en) using [OWSLib](https://owslib.readthedocs.io/en/latest/)

In [29]:
features = Features('https://api.weather.gc.ca/')
station_data = features.collection_items(
    'climate-stations', bbox=bbox
)
station_json = json.dumps(station_data, indent=4)
print(station_json)

{
    "type": "FeatureCollection",
    "features": [
        {
            "type": "Feature",
            "properties": {
                "STN_ID": 3336,
                "STATION_NAME": "STAR CITY",
                "PROV_STATE_TERR_CODE": "SK",
                "ENG_PROV_NAME": "SASKATCHEWAN",
                "FRE_PROV_NAME": "SASKATCHEWAN",
                "COUNTRY": "CAN",
                "LATITUDE": 525200000,
                "LONGITUDE": -1042000000,
                "TIMEZONE": "CST",
                "ELEVATION": "465.00",
                "CLIMATE_IDENTIFIER": "4057757",
                "TC_IDENTIFIER": null,
                "WMO_IDENTIFIER": null,
                "STATION_TYPE": "N/A",
                "NORMAL_CODE": "F",
                "PUBLICATION_CODE": 1,
                "DISPLAY_CODE": 7,
                "ENG_STN_OPERATOR_ACRONYM": null,
                "FRE_STN_OPERATOR_ACRONYM": null,
                "ENG_STN_OPERATOR_NAME": null,
                "FRE_STN_OPERATOR_NAME": nul

In [30]:
# Number of stations within 50km of point
len(station_data['features'])

104

In [34]:
# Create list of station IDs
stations = [station['properties']['STATION_NAME'] for station in station_data['features']]
stations

['STAR CITY',
 'STAR CITY CDA EPF',
 'ROBLIN',
 'DURBAN CDA EPF',
 'KENVILLE EXP ST',
 'MOUNTAIN GAP',
 'FENWOOD',
 'LEROSS',
 'PARKERVIEW 2',
 'PARKERVIEW',
 'PASWEGIN',
 'PREECEVILLE',
 'SPALDING',
 'DANBURY',
 'HUDSON BAY A',
 'KELVINGTON EXP ST',
 'PORCUPINE PLAIN 2',
 'PORCUPINE PLAIN 3',
 'ROBLIN',
 'YORKTON',
 'COWAN',
 'HUDSON BAY',
 'YORKTON',
 'YORKTON A',
 'YORKTON CDA EPF',
 'TONKIN',
 'TISDALE',
 'VALPARAISO',
 'BERTWELL',
 'HUDSON BAY 2',
 'PLEASANTDALE',
 'MAFEKING',
 'SWAN RIVER',
 'DYSART',
 'LESTOCK',
 'MARGO',
 'PREECEVILLE 2',
 'PINE RIVER',
 'WYNYARD',
 'ARMIT',
 'LAC VERT',
 'YORKTON',
 'GRANDVIEW CDA EPF',
 'LENSWOOD EXP ST',
 'SWAN RIVER 2',
 'HUBBARD',
 'PUNNICHY',
 'TISDALE SOUTH',
 'VEREGIN',
 'ROBLIN',
 'LINTLAW',
 'WEEKES',
 'ROBLIN NORTH',
 'HUDSON BAY(AUT)',
 'YORKTON',
 'BUCHANAN',
 'KELLIHER',
 'LIPTON 2',
 'HUDSON BAY 13W',
 'STORNOWAY',
 'WYNYARD (AUT)',
 'HUNTERS POINT MARINE',
 'CROOKED RIVER',
 'HUDSON BAY',
 'MCKAGUE',
 'PELLY 2',
 'SOMME CDA EPF'

Retrieving [daily climate observations](https://api.weather.gc.ca/collections/climate-daily/items?lang=en) using OWSLib

In [41]:
features = Features('https://api.weather.gc.ca/')
weather_data = features.collection_items(
    'climate-daily', bbox=bbox, datetime=time_
)
weather_json = json.dumps(weather_data, indent=4)
print(weather_json)

{
    "type": "FeatureCollection",
    "features": [
        {
            "id": "40190LN.2000.3.10",
            "type": "Feature",
            "geometry": {
                "coordinates": [
                    -104.2,
                    51.766666666666666
                ],
                "type": "Point"
            },
            "properties": {
                "DIRECTION_MAX_GUST_FLAG": null,
                "LOCAL_YEAR": 2000,
                "MIN_REL_HUMIDITY": null,
                "SPEED_MAX_GUST_FLAG": null,
                "TOTAL_RAIN": 0,
                "COOLING_DEGREE_DAYS": 0,
                "COOLING_DEGREE_DAYS_FLAG": null,
                "LOCAL_DAY": 10,
                "MEAN_TEMPERATURE": -7.7,
                "TOTAL_SNOW_FLAG": null,
                "TOTAL_SNOW": 0,
                "MIN_REL_HUMIDITY_FLAG": null,
                "TOTAL_PRECIPITATION": 0,
                "SNOW_ON_GROUND_FLAG": "M",
                "CLIMATE_IDENTIFIER": "40190LN",
                "HE

In [39]:
len(weather_data['features'])

500