## Preprocessing Data of ENSO

In [1]:
# Main Lib
import pandas as pd 
import numpy as np 

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 1000)

import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px 

### Step 1. Read Dataset

- Read Hotspot

In [2]:
df_hotspot = pd.read_csv("../Dataset/Hotspot_Monthly.csv", parse_dates=["acq_date"])
print(df_hotspot)

      acq_date  hotspot
0   2001-01-31       27
1   2001-02-28       24
2   2001-03-31       29
3   2001-04-30       30
4   2001-05-31       31
..         ...      ...
271 2023-08-31       31
272 2023-09-30       30
273 2023-10-31       31
274 2023-11-30       30
275 2023-12-31       31

[276 rows x 2 columns]


- Read ENSO

In [3]:
df_enso = pd.read_csv("../Dataset/ENSO.csv", parse_dates=["acq_date"])
print(df_enso)

      acq_date  sst_std  sst_anom  oni_std  oni_anom  soi_std  soi_anom
0   2001-01-31    25.81     -0.76    25.88     -0.68      1.0       1.6
1   2001-02-28    26.18     -0.56    26.26     -0.52      1.7       2.8
2   2001-03-31    26.86     -0.37    26.76     -0.44      0.9       1.5
3   2001-04-30    27.24     -0.56    27.25     -0.34      0.2       0.3
4   2001-05-31    27.42     -0.46    27.49     -0.25     -0.5      -0.8
..         ...      ...       ...      ...       ...      ...       ...
271 2023-08-31    28.20      1.30    28.28      1.32     -0.8      -1.4
272 2023-09-30    28.29      1.53    28.32      1.56     -1.3      -2.1
273 2023-10-31    28.36      1.59    28.49      1.78     -0.5      -0.8
274 2023-11-30    28.72      1.90    28.60      1.92     -0.8      -1.3
275 2023-12-31    28.64      1.99    28.59      1.97     -0.2      -0.4

[276 rows x 7 columns]


- Combine Hotspot & ENSO

In [4]:
df = pd.concat([
    pd.DataFrame(df_enso),
    pd.DataFrame(np.array(df_hotspot["hotspot"]), columns=["hotspot"])
], axis=1)
print(df)

      acq_date  sst_std  sst_anom  oni_std  oni_anom  soi_std  soi_anom  hotspot
0   2001-01-31    25.81     -0.76    25.88     -0.68      1.0       1.6       27
1   2001-02-28    26.18     -0.56    26.26     -0.52      1.7       2.8       24
2   2001-03-31    26.86     -0.37    26.76     -0.44      0.9       1.5       29
3   2001-04-30    27.24     -0.56    27.25     -0.34      0.2       0.3       30
4   2001-05-31    27.42     -0.46    27.49     -0.25     -0.5      -0.8       31
..         ...      ...       ...      ...       ...      ...       ...      ...
271 2023-08-31    28.20      1.30    28.28      1.32     -0.8      -1.4       31
272 2023-09-30    28.29      1.53    28.32      1.56     -1.3      -2.1       30
273 2023-10-31    28.36      1.59    28.49      1.78     -0.5      -0.8       31
274 2023-11-30    28.72      1.90    28.60      1.92     -0.8      -1.3       30
275 2023-12-31    28.64      1.99    28.59      1.97     -0.2      -0.4       31

[276 rows x 8 columns]


In [5]:
df.to_csv("../Dataset/Hotspot_Final.csv", index=False)