In [1]:
# Import 3rd party libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import re


# Configure Notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read in the three wildfire data files

wf_data_2019 =  pd.read_csv(r"C:\Users\efarr\PycharmProjects\CIV1498Project2\0-Raw_data\2019_wildfires.csv")
wf_data_2020 =  pd.read_csv(r"C:\Users\efarr\PycharmProjects\CIV1498Project2\0-Raw_data\2020_wildfires.csv")
wf_data_2021 =  pd.read_csv(r"C:\Users\efarr\PycharmProjects\CIV1498Project2\0-Raw_data\2021_wildfires.csv")

# View head and tail of weather data file
print(wf_data_2021.head(10))

                Fire Name Fire Number Hectares Burned Date of Discovery  \
0             Sparks Lake      K21001          95,980         6/28/2021   
1            Lytton Creek      K71086          83,671         6/30/2021   
2         White Rock Lake      K61884          83,342         7/13/2021   
3  5 km West of Flat Lake      C41602          73,862          7/8/2021   
4           Tremont Creek      K21849          63,548         7/12/2021   
5             McKay Creek      K71030          44,964         6/29/2021   
6            Cutoff Creek      G41269          33,418          7/2/2021   
7           Octopus Creek      N51800          22,049         7/11/2021   
8        Chief Louis Lake      R11562          20,750          7/7/2021   
9           July Mountain      K61882          19,661         7/13/2021   

          Cause  
0         Human  
1  Undetermined  
2  Undetermined  
3       Natural  
4  Undetermined  
5         Human  
6       Natural  
7       Natural  
8       Natu

Wildfire datasets consist of the fire name, fire number/fire centre, hectares burned, date of discovery, and cause.

Next, we'll process these datasets. We need to replace the fire number with the fire centre in which the fire was discovered, change the hectares burned to type float, and change the date of discovery to datetime index.

In [3]:
def clean_wf_data(df):
    # A function that takes in a raw dataframe of weather data and renames the columns, fixes the data types, turns data column into a DateTimeIndex and returns the dataframe

    # Rename column for clarity
    df.rename(columns={'Fire Number':'Fire Centre Name'}, inplace=True)

    # Remove comma from string in hectares burned
    df['Hectares Burned'] = df['Hectares Burned'].astype(str)
    df['Hectares Burned'] = df['Hectares Burned'].str.replace(',','').astype(float)

    # Define datatypes
    df['Date of Discovery'] = df['Date of Discovery'].astype(str)
    df['Fire Centre Name'] = df['Fire Centre Name'].astype(str)

    # Turn date columns into a DateTimeIndex
    format = '%m/%d/%Y'
    df['Datetime'] = pd.to_datetime(df['Date of Discovery'], format=format)
    df = df.set_index(pd.DatetimeIndex(df['Datetime']))
    df.drop(['Date of Discovery', 'Datetime'], axis=1, inplace=True)

    return df

In [5]:
def clean_fire_codes(df):
    # A function that renames fire codes to fire centre name. Had to split it from other defintion because it can't be used on 2019 data
    # Relabel fire codes to say fire centre name instead for relevant area
    df.loc[df['Fire Centre Name'].str.startswith("G"), 'Fire Centre Name'] = "Prince George"
    df.loc[df['Fire Centre Name'].str.startswith("R"), 'Fire Centre Name'] = "Northwest"
    df.loc[df['Fire Centre Name'].str.startswith("C"), 'Fire Centre Name'] = "Cariboo"
    df.loc[df['Fire Centre Name'].str.startswith("V"), 'Fire Centre Name'] = "Coastal"
    df.loc[df['Fire Centre Name'].str.startswith("K"), 'Fire Centre Name'] = "Kamloops"
    df.loc[df['Fire Centre Name'].str.startswith("N"), 'Fire Centre Name'] = "Southeast"

    return df

In [6]:
# Run weather dataframes through function to clean
wf_data_2019 = clean_wf_data(wf_data_2019)
wf_data_2020 = clean_wf_data(wf_data_2020)
wf_data_2021 = clean_wf_data(wf_data_2021)

# Run 2020 and 2021 data through fire code renaming
wf_data_2020 = clean_fire_codes(wf_data_2020)
wf_data_2021 = clean_fire_codes(wf_data_2021)

# View data frame
wf_data_2021.head()

Unnamed: 0_level_0,Fire Name,Fire Centre Name,Hectares Burned,Cause
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-06-28,Sparks Lake,Kamloops,95980.0,Human
2021-06-30,Lytton Creek,Kamloops,83671.0,Undetermined
2021-07-13,White Rock Lake,Kamloops,83342.0,Undetermined
2021-07-08,5 km West of Flat Lake,Cariboo,73862.0,Natural
2021-07-12,Tremont Creek,Kamloops,63548.0,Undetermined


In [7]:
# Combine all yearly dataframes into one dataframe for the entire period
wf_data = pd.concat([wf_data_2019, wf_data_2020, wf_data_2021], axis=0)

# view dataframe
wf_data.head(10)

Unnamed: 0_level_0,Fire Name,Fire Centre Name,Hectares Burned,Cause
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-05-20,Fontas River,Prince George,650.0,Natural
2019-05-11,Lejac,Prince George,236.0,Human
2019-05-11,Coffee Creek Subdivision,Prince George,9.0,human
2019-05-13,Richter Creek,Kamloops,507.0,unknown
2019-05-27,Black Angus Creek,Northwest,1935.0,Natural
2019-07-04,Linklater Creek,Southeast,50.0,Human
2019-07-06,Southwest Tagish Lake,Northwest,1197.0,Natural
2019-07-24,Richter Mountain,Kamloops,403.0,Natural
2019-08-04,Eagle Bluff,Kamloops,2532.0,Human
2020-04-14,Magee Road,Coastal,203.0,Human


In [8]:
wf_data.to_csv("wildfire_data.csv")