# Weather data
This notebook merges the [Canadian Wildfire Dataset](https://www.kaggle.com/datasets/ulasozdemir/wildfires-in-canada-19502021) from Kaggle and [Canadian Wildland Fire Information System](https://cwfis.cfs.nrcan.gc.ca/downloads/activefires/) with [weather data](https://api.weather.gc.ca/collections/climate-daily?lang=en) from Environment and Climate Change Canada (ECCC) and Meteorological Service of Canada (MSC) by using the [MSC GeoMet API](https://eccc-msc.github.io/open-data/msc-geomet/readme_en/). To do this, we use the merged dataset (fire + nearest station) to associate each wildfire point with weather data in the [daily climate observation collection](https://api.weather.gc.ca/collections/climate-daily?lang=en)

In [1]:
import pandas as pd
from owslib.ogcapi.features import Features

In [2]:
features = Features('https://api.weather.gc.ca/')

In [3]:
wildfire_df = pd.read_csv('fires-merged-3.csv')

In [4]:
# Repeat for smaller wildfires because API calls crashes, so split into smaller datasets
small_df1 = wildfire_df[:25000]
small_df2 = wildfire_df[25000:]

## Querying MSC GeoMet API

In [5]:
columns = ['TOTAL_RAIN_FLAG', 'MEAN_TEMPERATURE_FLAG', 'MIN_REL_HUMIDITY', 'HEATING_DEGREE_DAYS_FLAG', 'ID', 'MAX_TEMPERATURE', 'MAX_TEMPERATURE_FLAG', 'MIN_TEMPERATURE', 'TOTAL_RAIN', 'DIRECTION_MAX_GUST_FLAG', 'LOCAL_DAY', 'LOCAL_DATE', 'SNOW_ON_GROUND', 'PROVINCE_CODE', 'DIRECTION_MAX_GUST', 'COOLING_DEGREE_DAYS', 'SPEED_MAX_GUST_FLAG', 'MAX_REL_HUMIDITY_FLAG', 'MIN_TEMPERATURE_FLAG', 'TOTAL_PRECIPITATION_FLAG', 'STATION_NAME', 'LOCAL_YEAR', 'LOCAL_MONTH', 'MIN_REL_HUMIDITY_FLAG', 'TOTAL_SNOW_FLAG', 'MEAN_TEMPERATURE', 'CLIMATE_IDENTIFIER', 'SNOW_ON_GROUND_FLAG', 'TOTAL_SNOW', 'COOLING_DEGREE_DAYS_FLAG', 'SPEED_MAX_GUST', 'HEATING_DEGREE_DAYS', 'MAX_REL_HUMIDITY', 'TOTAL_PRECIPITATION']

In [6]:
rows = []

# Loop over every row in wildfire
for _, row in small_df2.iterrows():
    # Query every wildfire by using date and station name
    climate_data = features.collection_items(
        'climate-daily', LOCAL_DATE=row['date'], STATION_NAME=row['STATION_NAME']
    )
    
    # If there is a match
    if climate_data['features']:
        # Get weather data
        properties = climate_data['features'][0]['properties']

        # Merge old wildfire data with associated weather data
        fire_data = row.to_dict()
        new_row = {**fire_data, **properties}
        
        # Add row
        rows.append(new_row)
    else:
        # Merge old wildfire data
        fire_data = row.to_dict()
        properties = {column: None for column in columns if column != 'STATION_NAME'} # Do not override STATION_NAME with None
        # Merge empty weather data
        new_row = {**fire_data, **properties}
        rows.append(new_row)

In [7]:
df = pd.DataFrame(rows)
df.to_csv('fires-weather-32.csv', index=False)

# Merge smaller datasets

In [1]:
import pandas as pd

In [3]:
# Read
fires_11 = pd.read_csv('fires-weather-11.csv')
fires_12 = pd.read_csv('fires-weather-12.csv')
fires_21 = pd.read_csv('fires-weather-21.csv')
fires_22 = pd.read_csv('fires-weather-22.csv')
fires_31 = pd.read_csv('fires-weather-31.csv')
fires_32 = pd.read_csv('fires-weather-32.csv')

In [4]:
fires_weather_concat = pd.concat([fires_11, fires_12, fires_21, fires_22, fires_31, fires_32])

In [5]:
# Save to CSV
fires_weather_concat.to_csv('fires-weather.csv', index=False)