# Account for other weather events
El Niño and La Niña weather events are said to have a significant impact on Australian rainfall conditions, and the model should account for this. The Australian Bureau of Meteorology measures these events with the Southern Oscilation Index.

In [53]:
import pandas as pd

df = pd.read_csv('../data/from-kaggle.csv')
df['acq_date'] = pd.to_datetime(df['acq_date'])

soi_df = pd.read_csv('../data/soi_bom.csv', names=["date", "soi"], dtype={"date": str, "soi": float})

# soi_df date format is in YYYYMM

In [54]:
soi_df

Unnamed: 0,date,soi
0,187601,11.3
1,187602,11.0
2,187603,0.2
3,187604,9.4
4,187605,6.8
...,...,...
1765,202302,10.5
1766,202303,-2.0
1767,202304,0.3
1768,202305,-18.5


In [55]:
# Create an empty list to store the soi values
soi_list = []

iteration = 0

# Loop through each row in df
for index, row in df.iterrows():
    # Convert the acq_date value into a string in YYYYMM format
    date_str = row['acq_date'].strftime('%Y%m')
    
    # Find the row in soi_df that has the same value in the date column as the converted acq_date
    soi_row = soi_df.loc[soi_df['date'].str.contains(date_str)]
    
    # Copy the soi value from that row in soi_df and append it to the soi_list
    soi_value = soi_row['soi'].iloc[0]
    soi_list.append(soi_value)

# Create a new column in df with the soi_list
df['soi'] = soi_list

In [56]:
df

Unnamed: 0.1,Unnamed: 0,acq_date,fires,frp,scan,brightness,max_t_syd,max_t_bne,soi
0,0.0,2000-11-01,104.0,17572.6,195.4,357.982692,22.8,27.0,22.4
1,1.0,2000-11-02,211.0,45603.4,307.3,351.112322,22.3,24.0,22.4
2,2.0,2000-11-03,181.0,27439.3,325.8,353.117680,23.8,26.0,22.4
3,3.0,2000-11-04,239.0,29887.1,299.7,361.599163,23.0,28.0,22.4
4,4.0,2000-11-05,43.0,7193.7,92.0,347.181395,21.4,28.0,22.4
...,...,...,...,...,...,...,...,...,...
6830,6919.0,2019-10-12,211.0,36282.4,312.8,361.617062,20.4,21.3,-5.6
6831,6920.0,2019-10-13,128.0,23654.4,225.6,357.284375,19.2,25.3,-5.6
6832,6921.0,2019-10-14,230.0,49021.2,346.0,361.449565,21.9,25.9,-5.6
6833,6922.0,2019-10-15,137.0,23865.6,232.1,357.659854,24.0,27.2,-5.6


In [57]:
df.to_csv("../data/data.csv")