In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

fire_data = pd.read_csv('fire_data_cleaned.csv')

# only keep entries where SRC_AGENCY = AB or NB
fire_data = fire_data[(fire_data['SRC_AGENCY'] == 'AB') | (fire_data['SRC_AGENCY'] == 'NB')]
fire_data.head()


Unnamed: 0,FID,SRC_AGENCY,FIRE_ID,LATITUDE,LONGITUDE,YEAR,MONTH,DAY,REP_DATE,OUT_DATE,SIZE_HA,CAUSE,ECOZONE,ECOZ_REF,ECOZ_NAME,ECOZ_NOM
0,187640,AB,MWF036,59.122133,-111.158783,2015,6,6,2015-06-06 00:00:00,2015-06-06 00:00:00,0.01,L,5,5a,Taiga Shield West,Taiga du Bouclier
1,187641,AB,MWF052,59.6531,-110.343233,2015,6,24,2015-06-24 00:00:00,2015-11-12 00:00:00,24458.0,L,5,5a,Taiga Shield West,Taiga du Bouclier
2,187642,AB,MWF054,59.081333,-110.849717,2015,6,24,2015-06-24 00:00:00,2015-06-24 00:00:00,0.01,L,5,5a,Taiga Shield West,Taiga du Bouclier
3,187643,AB,MWF055,59.3535,-110.506667,2015,6,24,2015-06-24 00:00:00,2015-06-24 00:00:00,0.1,L,5,5a,Taiga Shield West,Taiga du Bouclier
4,187644,AB,MWF058,59.044483,-110.816367,2015,6,24,2015-06-24 00:00:00,2015-06-24 00:00:00,0.01,L,5,5a,Taiga Shield West,Taiga du Bouclier


In [5]:
import openmeteo_requests
import requests_cache
from retry_requests import retry

cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

def get_weather_data(lat, lon, date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": date,
        "end_date": date,
        "daily": ["temperature_2m_max", "temperature_2m_min", "temperature_2m_mean",
                  "apparent_temperature_max", "apparent_temperature_min", "apparent_temperature_mean"],
        "timezone": "America/New_York"  # Adjust timezone if needed
    }
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    daily = response.Daily()

    return {
        "temperature_2m_max": daily.Variables(0).ValuesAsNumpy()[0],
        "temperature_2m_min": daily.Variables(1).ValuesAsNumpy()[0],
        "temperature_2m_mean": daily.Variables(2).ValuesAsNumpy()[0],
        "apparent_temperature_max": daily.Variables(3).ValuesAsNumpy()[0],
        "apparent_temperature_min": daily.Variables(4).ValuesAsNumpy()[0],
        "apparent_temperature_mean": daily.Variables(5).ValuesAsNumpy()[0]
    }

# Iterate over rows and get weather data
for index, row in fire_data.iterrows():
    date_str = f"{row['YEAR']}-{row['MONTH']:02d}-{row['DAY']:02d}"
    weather_data = get_weather_data(row['LATITUDE'], row['LONGITUDE'], date_str)
    print(weather_data)

    for key, value in weather_data.items():
        fire_data.loc[index, key] = value

{'temperature_2m_max': 20.624498, 'temperature_2m_min': 11.6745, 'temperature_2m_mean': 16.022415, 'apparent_temperature_max': 19.25192, 'apparent_temperature_min': 10.754039, 'apparent_temperature_mean': 14.882615}
{'temperature_2m_max': 20.7025, 'temperature_2m_min': 11.752501, 'temperature_2m_mean': 16.100416, 'apparent_temperature_max': 19.356401, 'apparent_temperature_min': 10.85451, 'apparent_temperature_mean': 14.987226}
{'temperature_2m_max': 24.163, 'temperature_2m_min': 13.462999, 'temperature_2m_mean': 18.752584, 'apparent_temperature_max': 23.669865, 'apparent_temperature_min': 11.984821, 'apparent_temperature_mean': 17.884575}
{'temperature_2m_max': 26.1855, 'temperature_2m_min': 9.985499, 'temperature_2m_mean': 17.491749, 'apparent_temperature_max': 25.64235, 'apparent_temperature_min': 7.1147537, 'apparent_temperature_mean': 16.66641}
{'temperature_2m_max': 23.374498, 'temperature_2m_min': 12.7245, 'temperature_2m_mean': 18.626581, 'apparent_temperature_max': 23.528059, 

In [6]:
fire_data.to_csv('./fire_data_FINAL_with_temp_AB_NB.csv', index=False)

In [7]:
#print the types of values in column CAUSE
print(fire_data['CAUSE'].unique())

['L' 'H']


In [9]:
fire_data_raw = pd.read_csv('fire_data_cleaned.csv')
print(fire_data_raw['CAUSE'].unique())

['L' 'H' 'U']


In [16]:
# print the number of rows with CAUSE == U
print(fire_data_raw[(fire_data_raw['CAUSE'] == 'U') ])


          FID SRC_AGENCY      FIRE_ID   LATITUDE   LONGITUDE  YEAR  MONTH  \
172    188006         AB       HWF331  58.646900 -115.188400  2015     10   
190    188062         AB       MWF129  58.706167 -111.186500  2015      7   
276    188377         AB       HWF285  58.478917 -116.344967  2015      7   
283    188384         AB       HWF310  58.389236 -115.935335  2015      8   
295    188396         AB       PWF012  57.840867 -117.650467  2015      4   
...       ...        ...          ...        ...         ...   ...    ...   
23029  396029         NS  06-001-2016  44.760246  -65.386772  2016      4   
23065  396364         NS  02-014-2015  44.261030  -64.804790  2015      9   
23100  396693         NS  03-006-2015  43.860530  -65.042620  2015      9   
23129  396866         NS  04-005-2016  43.839629  -66.102215  2016      4   
23159  397094         NS  04-015-2016  43.883854  -65.697838  2016      5   

       DAY             REP_DATE             OUT_DATE  SIZE_HA CAUSE  ECOZON

In [18]:
# print the province counts with cause == u
print(fire_data_raw[(fire_data_raw['CAUSE'] == 'U') ]['SRC_AGENCY'].value_counts())

AB    233
NB    221
ON     74
SK     41
NS     24
MB      1
Name: SRC_AGENCY, dtype: int64


In [19]:
# number of rows by SRC_AGENCY
print(fire_data_raw['SRC_AGENCY'].value_counts())

AB    5835
ON    5765
QC    3583
SK    2990
MB    2123
NB    1859
NT    1033
NS     516
NL     489
YT     403
Name: SRC_AGENCY, dtype: int64


In [22]:
# only keep QC, AB, NB provinces
imp_provinces = fire_data_raw[(fire_data_raw['SRC_AGENCY'] == 'AB') | (fire_data_raw['SRC_AGENCY'] == 'NB')]
imp_provinces.count()

FID           7694
SRC_AGENCY    7694
FIRE_ID       7694
LATITUDE      7694
LONGITUDE     7694
YEAR          7694
MONTH         7694
DAY           7694
REP_DATE      7694
OUT_DATE      7694
SIZE_HA       7694
CAUSE         7694
ECOZONE       7694
ECOZ_REF      7694
ECOZ_NAME     7694
ECOZ_NOM      7694
dtype: int64

In [2]:
import pandas as pd
fire_data_raw = pd.read_csv('fire_data.csv')
print(fire_data_raw[(fire_data_raw['CAUSE'] == 'U') ]['SRC_AGENCY'].value_counts())


AB    333
NB    221
BC    206
ON     74
SK     51
NS     28
MB      9
Name: SRC_AGENCY, dtype: int64
