# Process New Fire Data

This section decidedly does *not* use Mojo, as Mojo currently lacks useful I/O libraries.

In [1]:
import pandas as pd

In [2]:
raw_df = pd.read_csv("../data/gen_2/unprocessed/fire_archive_M-C61_28737.csv")

raw_df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,-18.6804,145.5470,317.3,2.9,1.6,2000-11-01,16,Terra,MODIS,54,6.03,299.4,32.8,D,0
1,-18.4005,144.9007,318.8,3.2,1.7,2000-11-01,16,Terra,MODIS,64,6.03,301.5,42.6,D,0
2,-18.4459,144.8904,324.6,3.2,1.7,2000-11-01,16,Terra,MODIS,74,6.03,303.1,77.4,D,0
3,-18.4401,144.8603,317.7,3.3,1.7,2000-11-01,16,Terra,MODIS,61,6.03,303.2,38.5,D,0
4,-17.0518,143.8872,322.1,4.1,1.9,2000-11-01,16,Terra,MODIS,31,6.03,298.9,72.3,D,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5571327,-19.4893,123.8846,306.3,1.1,1.1,2023-01-31,1726,Aqua,MODIS,67,6.03,290.5,6.1,N,0
5571328,-19.4908,123.8953,352.9,1.1,1.1,2023-01-31,1726,Aqua,MODIS,100,6.03,292.2,79.8,N,0
5571329,-19.4937,123.9167,332.9,1.1,1.1,2023-01-31,1726,Aqua,MODIS,100,6.03,292.1,36.0,N,0
5571330,-19.4965,123.9382,306.5,1.1,1.1,2023-01-31,1726,Aqua,MODIS,55,6.03,291.5,6.0,N,0


In [3]:
filtered_df = raw_df[raw_df['confidence'] > 80]

filtered_df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
29,-20.6460,148.4702,332.3,1.6,1.2,2000-11-01,17,Terra,MODIS,81,6.03,301.2,42.9,D,0
32,-20.4475,148.5904,332.7,1.6,1.2,2000-11-01,17,Terra,MODIS,82,6.03,299.9,46.4,D,0
49,-20.5602,146.0194,331.7,2.4,1.5,2000-11-01,17,Terra,MODIS,81,6.03,308.2,68.8,D,0
50,-20.5662,146.0241,332.1,2.4,1.5,2000-11-01,17,Terra,MODIS,82,6.03,306.5,66.9,D,0
52,-22.0605,145.5092,340.6,2.4,1.5,2000-11-01,17,Terra,MODIS,87,6.03,310.8,105.6,D,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5571322,-31.3319,147.8329,312.7,1.4,1.2,2023-01-31,1550,Aqua,MODIS,86,6.03,291.5,19.1,N,0
5571326,-19.4922,123.9060,380.6,1.1,1.1,2023-01-31,1726,Aqua,MODIS,100,6.03,294.7,183.9,N,0
5571328,-19.4908,123.8953,352.9,1.1,1.1,2023-01-31,1726,Aqua,MODIS,100,6.03,292.2,79.8,N,0
5571329,-19.4937,123.9167,332.9,1.1,1.1,2023-01-31,1726,Aqua,MODIS,100,6.03,292.1,36.0,N,0


In [4]:
grouped_df = filtered_df.groupby("acq_date")

with_fire_area = pd.DataFrame(grouped_df.apply(lambda x: (x['scan'] * x['track']).sum()))
with_fire_area = with_fire_area.reset_index()
with_fire_area = with_fire_area.rename(columns={0: "fire_area"})

with_fire_area

Unnamed: 0,acq_date,fire_area
0,2000-11-01,550.38
1,2000-11-02,858.08
2,2000-11-03,1054.32
3,2000-11-04,931.25
4,2000-11-05,314.20
...,...,...
8091,2023-01-27,178.20
8092,2023-01-28,33.61
8093,2023-01-29,43.97
8094,2023-01-30,25.91


In [5]:
soi_df = pd.read_csv("../data/gen_2/unprocessed/soi_bom.csv", dtype={'month': str})

def get_soi(row):
    month = "".join(row['acq_date'].split('-')[0:2])
    soi = soi_df.loc[soi_df['month'] == month]['soi'].item()
    return soi

add_soi = with_fire_area.copy(deep=True)

add_soi['soi'] = with_fire_area.apply(get_soi, axis=1)

add_soi

Unnamed: 0,acq_date,fire_area,soi
0,2000-11-01,550.38,22.4
1,2000-11-02,858.08,22.4
2,2000-11-03,1054.32,22.4
3,2000-11-04,931.25,22.4
4,2000-11-05,314.20,22.4
...,...,...,...
8091,2023-01-27,178.20,11.8
8092,2023-01-28,33.61,11.8
8093,2023-01-29,43.97,11.8
8094,2023-01-30,25.91,11.8


In [6]:
# Intermediate save to run some pre-analysis
add_soi.to_csv("../data/gen_2/processed/with_fire_area_and_soi.csv", index=False)

This pre-analysis determined that adding the temperatures of various cities around Australia was necessary.

In [7]:
brisbane_df = pd.read_csv("../data/gen_2/unprocessed/temperature/Brisbane.csv")
melbourne_df = pd.read_csv("../data/gen_2/unprocessed/temperature/Melbourne.csv")
cairns_df = pd.read_csv("../data/gen_2/unprocessed/temperature/Cairns.csv")
perth_df = pd.read_csv("../data/gen_2/unprocessed/temperature/Perth.csv")
sydney_df = pd.read_csv("../data/gen_2/unprocessed/temperature/Sydney.csv")

In [8]:
def add_date(row):
  row['date'] = f"{row['Year']}-{row['Month']}-{row['Day']}"
  return row

brisbane_df = brisbane_df.apply(add_date, axis=1)
melbourne_df = melbourne_df.apply(add_date, axis=1)
cairns_df = cairns_df.apply(add_date, axis=1)
perth_df = perth_df.apply(add_date, axis=1)
sydney_df = sydney_df.apply(add_date, axis=1)

brisbane_df

Unnamed: 0,Product code,Bureau of Meteorology station number,Year,Month,Day,Maximum temperature (Degree C),Days of accumulation of maximum temperature,Quality,date
0,IDCJAC0010,40913,1999,1,1,,,,1999-1-1
1,IDCJAC0010,40913,1999,1,2,,,,1999-1-2
2,IDCJAC0010,40913,1999,1,3,,,,1999-1-3
3,IDCJAC0010,40913,1999,1,4,,,,1999-1-4
4,IDCJAC0010,40913,1999,1,5,,,,1999-1-5
...,...,...,...,...,...,...,...,...,...
8997,IDCJAC0010,40913,2023,8,20,23.3,1.0,N,2023-8-20
8998,IDCJAC0010,40913,2023,8,21,24.4,1.0,N,2023-8-21
8999,IDCJAC0010,40913,2023,8,22,25.9,1.0,N,2023-8-22
9000,IDCJAC0010,40913,2023,8,23,28.1,1.0,N,2023-8-23


In [9]:
brisbane_df = brisbane_df[['date', 'Maximum temperature (Degree C)']]
brisbane_df = brisbane_df.rename(columns={'Maximum temperature (Degree C)': 'max_t_bne'})
melbourne_df = melbourne_df[['date', 'Maximum temperature (Degree C)']]
melbourne_df = melbourne_df.rename(columns={'Maximum temperature (Degree C)': 'max_t_mel'})
cairns_df = cairns_df[['date', 'Maximum temperature (Degree C)']]
cairns_df = cairns_df.rename(columns={'Maximum temperature (Degree C)': 'max_t_cns'})
perth_df = perth_df[['date', 'Maximum temperature (Degree C)']]
perth_df = perth_df.rename(columns={'Maximum temperature (Degree C)': 'max_t_pth'})
sydney_df = sydney_df[['date', 'Maximum temperature (Degree C)']]
sydney_df = sydney_df.rename(columns={'Maximum temperature (Degree C)': 'max_t_syd'})

brisbane_df

Unnamed: 0,date,max_t_bne
0,1999-1-1,
1,1999-1-2,
2,1999-1-3,
3,1999-1-4,
4,1999-1-5,
...,...,...
8997,2023-8-20,23.3
8998,2023-8-21,24.4
8999,2023-8-22,25.9
9000,2023-8-23,28.1


In [10]:
weather_dataframes = [brisbane_df, melbourne_df, cairns_df, perth_df, sydney_df]
weather_dataframes = [df.set_index('date') for df in weather_dataframes]

weather_dataframe = weather_dataframes[0].join(weather_dataframes[1:])

weather_dataframe

Unnamed: 0_level_0,max_t_bne,max_t_mel,max_t_cns,max_t_pth,max_t_syd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999-1-1,,27.8,30.9,,25.7
1999-1-2,,34.0,31.6,30.2,28.3
1999-1-3,,27.3,31.2,28.0,29.8
1999-1-4,,27.9,32.4,23.4,30.9
1999-1-5,,37.5,32.2,22.4,31.1
...,...,...,...,...,...
2023-8-20,23.3,17.6,26.4,19.9,22.8
2023-8-21,24.4,18.3,27.5,18.9,22.3
2023-8-22,25.9,14.5,27.6,17.6,25.4
2023-8-23,28.1,16.4,28.5,19.3,17.6


In [11]:
add_weather = add_soi.merge(weather_dataframe, left_on='acq_date', right_on='date')
# this seems to have nuked all the NaN values as well.

add_weather

Unnamed: 0,acq_date,fire_area,soi,max_t_bne,max_t_mel,max_t_cns,max_t_pth,max_t_syd
0,2000-11-10,515.93,22.4,26.0,19.7,28.8,23.7,24.0
1,2000-11-11,592.95,22.4,25.0,20.2,26.9,31.3,23.5
2,2000-11-12,730.74,22.4,26.0,20.6,28.6,34.1,23.9
3,2000-11-13,471.52,22.4,22.0,23.0,29.3,32.7,22.2
4,2000-11-14,498.83,22.4,25.0,24.9,28.5,32.0,18.5
...,...,...,...,...,...,...,...,...
1467,2022-12-27,107.15,20.0,28.2,36.5,29.0,25.4,28.8
1468,2022-12-28,56.83,20.0,28.0,27.9,28.4,30.8,28.9
1469,2022-12-29,109.51,20.0,28.1,19.8,31.5,34.8,21.6
1470,2022-12-30,322.97,20.0,27.9,30.1,31.4,31.7,25.5


In [12]:
add_weather.to_csv("../data/gen_2/processed/with_fire_area_and_soi_and_weather.csv", index=False)

In [13]:
import numpy as np


def add_sin_cos(row):
    row['sin_month'] = np.sin(2 * np.pi * pd.to_datetime(row['acq_date']).month / 12)
    row['cos_month'] = np.cos(2 * np.pi * pd.to_datetime(row['acq_date']).month / 12)
    return row

add_sin_cos_df = add_weather.apply(add_sin_cos, axis=1)

add_sin_cos_df

Unnamed: 0,acq_date,fire_area,soi,max_t_bne,max_t_mel,max_t_cns,max_t_pth,max_t_syd,sin_month,cos_month
0,2000-11-10,515.93,22.4,26.0,19.7,28.8,23.7,24.0,-5.000000e-01,0.866025
1,2000-11-11,592.95,22.4,25.0,20.2,26.9,31.3,23.5,-5.000000e-01,0.866025
2,2000-11-12,730.74,22.4,26.0,20.6,28.6,34.1,23.9,-5.000000e-01,0.866025
3,2000-11-13,471.52,22.4,22.0,23.0,29.3,32.7,22.2,-5.000000e-01,0.866025
4,2000-11-14,498.83,22.4,25.0,24.9,28.5,32.0,18.5,-5.000000e-01,0.866025
...,...,...,...,...,...,...,...,...,...,...
1467,2022-12-27,107.15,20.0,28.2,36.5,29.0,25.4,28.8,-2.449294e-16,1.000000
1468,2022-12-28,56.83,20.0,28.0,27.9,28.4,30.8,28.9,-2.449294e-16,1.000000
1469,2022-12-29,109.51,20.0,28.1,19.8,31.5,34.8,21.6,-2.449294e-16,1.000000
1470,2022-12-30,322.97,20.0,27.9,30.1,31.4,31.7,25.5,-2.449294e-16,1.000000


In [14]:
def add_year(row):
    row['year'] = float(row['acq_date'].split('-')[0])
    return row

add_year_df = add_sin_cos_df.apply(add_year, axis=1)
add_year_df = add_year_df.drop(columns=['acq_date'])

add_year_df

Unnamed: 0,fire_area,soi,max_t_bne,max_t_mel,max_t_cns,max_t_pth,max_t_syd,sin_month,cos_month,year
0,515.93,22.4,26.0,19.7,28.8,23.7,24.0,-5.000000e-01,0.866025,2000.0
1,592.95,22.4,25.0,20.2,26.9,31.3,23.5,-5.000000e-01,0.866025,2000.0
2,730.74,22.4,26.0,20.6,28.6,34.1,23.9,-5.000000e-01,0.866025,2000.0
3,471.52,22.4,22.0,23.0,29.3,32.7,22.2,-5.000000e-01,0.866025,2000.0
4,498.83,22.4,25.0,24.9,28.5,32.0,18.5,-5.000000e-01,0.866025,2000.0
...,...,...,...,...,...,...,...,...,...,...
1467,107.15,20.0,28.2,36.5,29.0,25.4,28.8,-2.449294e-16,1.000000,2022.0
1468,56.83,20.0,28.0,27.9,28.4,30.8,28.9,-2.449294e-16,1.000000,2022.0
1469,109.51,20.0,28.1,19.8,31.5,34.8,21.6,-2.449294e-16,1.000000,2022.0
1470,322.97,20.0,27.9,30.1,31.4,31.7,25.5,-2.449294e-16,1.000000,2022.0


In [15]:
# Drop blank
add_year_df = add_year_df.dropna()

In [16]:
add_year_df.describe()

Unnamed: 0,fire_area,soi,max_t_bne,max_t_mel,max_t_cns,max_t_pth,max_t_syd,sin_month,cos_month,year
count,1436.0,1436.0,1436.0,1436.0,1436.0,1436.0,1436.0,1436.0,1436.0,1436.0
mean,1091.910522,2.850975,28.44805,23.08078,31.250975,27.153621,25.488022,-0.4518788,0.791034,2011.103064
std,1186.622621,10.411306,2.84499,5.710582,1.791262,5.402524,4.715308,0.3546867,0.210683,6.547747
min,3.1,-20.2,19.3,11.4,23.1,16.0,14.0,-0.8660254,0.5,2000.0
25%,332.155,-5.6,26.7,19.0,30.2,22.9,22.175,-0.8660254,0.5,2005.0
50%,734.54,2.4,28.4,22.2,31.3,26.1,25.0,-0.5,0.866025,2011.0
75%,1413.2875,10.9,30.1,26.9,32.4,30.8,28.1,-2.449294e-16,1.0,2017.0
max,13079.97,27.1,41.2,41.8,42.6,44.2,43.0,-2.449294e-16,1.0,2022.0


In [17]:
add_year_df.to_csv("../data/gen_2/processed/with_fire_area_soi_weather_time.csv", index=False)