In [1]:
import pandas as pd
import numpy as np
import os

# Extract Fire data

In [12]:
years = ['2016','2017','2018','2019']
countries = ['Cambodia','Myanmar','Thailand','Lao_PDR']

fire = {'2016':{}, '2017':{}, '2018':{}, '2019':{}}
data = {}

In [13]:
for year in years:
    timeline = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31 23:00:00', freq='H')
    left = pd.DataFrame(index=timeline)
    for country in countries:
        df = pd.read_csv(f'./Fire hotspot/viirs-snpp_{year}_{country}.csv')
        df['hour'] = df['acq_time'].apply(str).str[:-2]
        df['datetime'] = pd.to_datetime(df['acq_date']+'T'+df['hour'].str.zfill(2), 
                            format='%Y-%m-%dT%H')   

        # Assume : frp represent brightness
        df = df[['frp','datetime']]
        df.rename(columns={'frp':country+'_frp'}, inplace=True)

        # Assume : ไม่มีค่าไฟใน record = ไม่มีไฟ -> frp = 0
        df = df.groupby('datetime').mean()
        df = df.resample('H').mean().fillna(value=0.0)
        left = left.merge(df, how='left', left_index=True, right_index=True)
    data[year] = left.fillna(value=0.0)
                

In [14]:
fire_all = pd.concat(data)
fire_all.index = fire_all.index.levels[1]

In [15]:
fire_all

Unnamed: 0,Cambodia_frp,Myanmar_frp,Thailand_frp,Lao_PDR_frp
2016-01-01 00:00:00,0.0,0.000,0.000000,0.0
2016-01-01 01:00:00,0.0,0.000,0.000000,0.0
2016-01-01 02:00:00,0.0,0.000,0.000000,0.0
2016-01-01 03:00:00,0.0,0.000,0.000000,0.0
2016-01-01 04:00:00,0.0,0.000,0.000000,0.0
...,...,...,...,...
2019-12-31 19:00:00,0.0,0.612,1.628704,0.0
2019-12-31 20:00:00,0.0,0.000,0.000000,0.0
2019-12-31 21:00:00,0.0,0.000,0.000000,0.0
2019-12-31 22:00:00,0.0,0.000,0.000000,0.0


In [16]:
provinces = ['Bangkok','Chanthaburi','Chiang Mai','Kanchanaburi','Khon Kaen','Songkhla']
mega = {}

for province in provinces:
    if province == 'Khon Kaen' : continue
    df = pd.read_csv(f"./data/Train/{province}_imputed.csv", parse_dates=True, index_col=0)
    mega[province] = df.merge(fire_all, left_index=True, right_index=True, how='left')

In [17]:
mega['Bangkok'].loc['2016-3-3']

Unnamed: 0,PM2.5,Temp(C),WindDir,Wind Speed(km/h),Cambodia_frp,Myanmar_frp,Thailand_frp,Lao_PDR_frp
2016-03-03 08:00:00,62.9,26.4,65.0,15.0,0.0,0.0,0.0,0.0
2016-03-03 09:00:00,62.9,26.4,65.0,15.0,0.0,0.0,0.0,0.0
2016-03-03 10:00:00,55.5,31.4,75.0,13.0,0.0,0.0,0.0,0.0
2016-03-03 11:00:00,55.5,31.4,75.0,13.0,0.0,0.0,0.0,0.0
2016-03-03 12:00:00,47.9,31.4,75.0,13.0,0.0,0.0,0.0,0.0
2016-03-03 13:00:00,43.6,34.1,70.0,12.0,0.0,0.0,0.0,0.0
2016-03-03 14:00:00,28.6,34.1,70.0,12.0,0.0,0.0,0.0,0.0
2016-03-03 15:00:00,33.6,34.1,70.0,12.0,0.0,0.0,0.0,0.0
2016-03-03 16:00:00,34.8,34.6,60.0,12.0,0.0,0.0,0.0,0.0
2016-03-03 17:00:00,31.3,34.6,60.0,12.0,0.0,0.0,0.0,0.0


In [82]:
mega['Bangkok'].loc['2016-3-3']

Unnamed: 0,PM2.5,Temp(C),WindDir,Wind Speed(km/h),Cambodia_frp,Myanmar_frp,Thailand_frp,Lao_PDR_frp
2016-03-03 08:00:00,62.9,26.4,65.0,15.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 09:00:00,62.9,26.4,65.0,15.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 10:00:00,55.5,31.4,75.0,13.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 11:00:00,55.5,31.4,75.0,13.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 12:00:00,47.9,31.4,75.0,13.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 13:00:00,43.6,34.1,70.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 14:00:00,28.6,34.1,70.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 15:00:00,33.6,34.1,70.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 16:00:00,34.8,34.6,60.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 17:00:00,31.3,34.6,60.0,12.0,8.984035,8.099819,9.423972,7.991304


## Save extracted data

In [18]:
for province in provinces:
    if province == 'Khon Kaen' : continue
    path = f'./data/Train/fire_integrated/{province}_fire_integrated.csv'
    if not os.path.exists(path):
        mega[province].to_csv(path)
    else:
        print(f"{province} already")

save ข้อมูลไฟทั้งหมด เก็บไว้ใช้ต่อกับ test set

In [35]:
import glob

if len(glob.glob("./data/*.csv"))==0:
    fire_all.to_csv('./data/fire_2016_to_2019.csv')
else:
    print('already')

already
