## Akuisisi Data

### 1. Deklarasi Pustaka

In [None]:
# pustaka manipulasi data array
import numpy as np
from numpy import concatenate

# pustaka manipulasi data frame
import pandas as pd
from pandas import concat
from pandas import DataFrame

# pustaka tanggal
from datetime import datetime
from matplotlib import pyplot as plt

### 2. Deklarasi Function

In [None]:
# define function
def plot_time_series(x, y, label, title):
    
    # membuat frame
    fig, ax = plt.subplots(figsize = (10,5))
    
    # membuat time series plot
    ax.plot(x, y, color="tab:blue", label=label, linewidth=2.5)
    
    # membuat label-label
    ax.set_title(title, fontsize=14)
    ax.set_xlabel("", fontsize=12)
    ax.set_ylabel("", fontsize=12)
    ax.legend(loc="best")
    ax.grid(True)
    
    # menampilkan plot
    plt.show()

### 3. Hotspot Sumatera Selatan

- Loada dataset

In [None]:
# load dataset
df_hotspot_1 = pd.read_csv("dataset/hotspot_sumsel_harian_2001_2022.csv", parse_dates=["acq_date"])

In [None]:
df_hotspot_1.info()

In [None]:
# load dataset
df_hotspot_2 = pd.read_csv("dataset/hotspot_sumsel_harian_2023.csv", parse_dates=["acq_date"])

In [None]:
df_hotspot_2.info()

In [None]:
df_hotspot = pd.concat([df_hotspot_1, df_hotspot_2])

In [None]:
df_hotspot.info()

In [None]:
df_hotspot

In [None]:
# # set index 
# df_hotspot = df_hotspot.set_index("acq_date")

- Aggregasi dataset

In [None]:
# Agregasi data harian menjadi bulanan dengan menghitung jumlah hotspot per bulan
df_hotspot_bulanan = df_hotspot.resample('M', on='acq_date').size().reset_index(name='hotspot')

In [None]:
df_hotspot_bulanan.info()

In [None]:
# df_hotspot_bulanan.to_csv("dataset/hotspot_sumsel_bulanan_2001_2023.csv", index=False)

- Visualisasi dataset

In [None]:
plot_time_series(df_hotspot_bulanan["acq_date"], df_hotspot_bulanan["hotspot"], "Hotspot Sumatera Selatan", "")

### 4. Index SOI

In [None]:
soi = pd.read_excel("dataset/dataset_enso.xlsx", sheet_name="SOI Results")

In [None]:
soi.info()

In [None]:
plot_time_series(soi["date"], soi["anomali"], "Index SOI tahun 2001 - 2023", "")

In [None]:
plot_time_series(soi["date"], soi["standard"], "Index SOI tahun 2001 - 2023", "")

### 5. SST Nina 3.4

In [None]:
sst = pd.read_excel("dataset/dataset_enso.xlsx", sheet_name="SST Results")

In [None]:
sst.info()

In [None]:
plot_time_series(sst["date"], sst["anomali"], "SST Nina 3.4 tahun 2001 - 2023", "")

In [None]:
plot_time_series(sst["date"], sst["standard"], "SST Nina 3.4 tahun 2001 - 2023", "")

### 5. ONI Index

In [None]:
oni = pd.read_excel("dataset/dataset_enso.xlsx", sheet_name="ONI Results")

In [None]:
plot_time_series(oni["date"], oni["anomali"], "ONI Index tahun 2001 - 2023", "")

In [None]:
plot_time_series(oni["date"], oni["standard"], "ONI Index tahun 2001 - 2023", "")

### Final Dataset

In [None]:
# Data hotspot
hotspot = df_hotspot_bulanan

# Data ENSO
# -----------------
soi_anom = soi[["anomali"]]
soi_std = soi[["standard"]]

sst_anom = sst[["anomali"]]
sst_std = sst[["standard"]]

oni_anom = oni[["anomali"]]
oni_std = oni[["standard"]]

# Data Iklim
# -----------------
radiasi = ""
curah_hujan = ""
suhu_udara = ""
kelembaban_udara = ""
kecepatan_angin = ""

In [None]:
dataset = pd.concat([hotspot, soi_anom, soi_std, sst_anom, sst_std, oni_anom, oni_std], axis=1)
dataset.columns = ["acq_date", "hotspot", "soi_anom", "soi_std", "sst_anom", "sst_std", "oni_anom", "oni_std"]

In [None]:
dataset

In [None]:
# dataset.to_csv("dataset/dataset_boptn.csv", index=False)