In [1]:
import pandas as pd
import numpy as np
import os

## Playground

In [2]:
example_fire = pd.read_csv('Fire hotspot/viirs-snpp_2016_Cambodia.csv')

example_fire['hour'] = example_fire['acq_time'].apply(str).str[:-2]

example_fire['datetime'] = pd.to_datetime(
                                            example_fire['acq_date']+'T'+example_fire['hour'].str.zfill(2), 
                                            format='%Y-%m-%dT%H')

In [3]:
filtered = example_fire[['latitude','longitude','bright_ti4','bright_ti5','frp','datetime']]
filtered.head(3)

Unnamed: 0,latitude,longitude,bright_ti4,bright_ti5,frp,datetime
0,11.128095,103.598412,340.71,300.81,4.28,2016-01-01 06:00:00
1,11.331814,105.105881,342.53,298.3,6.83,2016-01-01 06:00:00
2,10.649994,103.775459,332.4,299.12,4.63,2016-01-01 06:00:00


In [4]:
fire_Cambodia_2016 = filtered.groupby('datetime').mean()
fire_Cambodia_2016.head(3)

Unnamed: 0_level_0,latitude,longitude,bright_ti4,bright_ti5,frp
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01 06:00:00,13.123613,105.508592,342.235212,299.471295,7.933461
2016-01-01 17:00:00,12.971076,106.365074,307.710966,289.430966,1.66125
2016-01-01 19:00:00,13.303865,105.259085,305.723942,288.617404,1.58875


In [17]:
bkk = pd.read_csv('./data/Train/Bangkok_imputed.csv', index_col=0, parse_dates=True)

In [19]:
bkk.merge(fire_Cambodia_2016.resample('H').ffill(), left_index=True, right_index=True, how='left')

Unnamed: 0,PM2.5,Temp(C),WindDir,Wind Speed(km/h),latitude,longitude,bright_ti4,bright_ti5,frp
2016-03-03 08:00:00,62.9,26.4,65.0,15.0,13.120443,105.059428,342.179605,302.474164,8.984035
2016-03-03 09:00:00,62.9,26.4,65.0,15.0,13.120443,105.059428,342.179605,302.474164,8.984035
2016-03-03 10:00:00,55.5,31.4,75.0,13.0,13.120443,105.059428,342.179605,302.474164,8.984035
2016-03-03 11:00:00,55.5,31.4,75.0,13.0,13.120443,105.059428,342.179605,302.474164,8.984035
2016-03-03 12:00:00,47.9,31.4,75.0,13.0,13.120443,105.059428,342.179605,302.474164,8.984035
...,...,...,...,...,...,...,...,...,...
2019-03-17 19:00:00,42.2,31.3,70.0,17.0,,,,,
2019-03-17 20:00:00,41.2,31.3,70.0,17.0,,,,,
2019-03-17 21:00:00,37.7,31.3,70.0,17.0,,,,,
2019-03-17 22:00:00,39.0,30.2,85.0,19.0,,,,,


---
# Extract Fire data

In [68]:
years = ['2016','2017','2018','2019']
countries = ['Cambodia','Myanmar','Thailand','Lao_PDR']

fire = {'2016':{}, '2017':{}, '2018':{}, '2019':{}}

In [69]:
for year in years:
    for country in countries:
        df = pd.read_csv(f'./Fire hotspot/viirs-snpp_{year}_{country}.csv')
        df['hour'] = df['acq_time'].apply(str).str[:-2]
        
        df['datetime'] = pd.to_datetime(df['acq_date']+'T'+df['hour'].str.zfill(2), 
                            format='%Y-%m-%dT%H')   
        # เอาแค่ col ที่สนใจของตาราง fire spot
        # Assume : frp represent brightness
        fire[year][country] = df[['frp','datetime']].rename(columns={'frp':country+'_frp'})

In [70]:
#2016-03-03 08:00:00
#2019-03-17 23:00:00
data = {}
for year in years:
    left = pd.DataFrame(index=pd.date_range(
                    start='2016-01-01 01:00:00', 
                    end='2019-03-17 23:00:00', freq='H'))
    for country in countries:
        fire_groupby = fire[year][country].groupby('datetime').mean()
        left = left.merge(fire_groupby.resample('H').ffill(), left_index=True, right_index=True, how='left')
    
    data[year] = left.dropna(how='all')

In [78]:
data['2016'].shape

(8773, 4)

In [76]:
fire_all = pd.concat(data)
fire_all.index = fire_all.index.levels[1]

In [79]:
provinces = ['Bangkok','Chanthaburi','Chiang Mai','Kanchanaburi','Khon Kaen','Songkhla']
mega = {}

for province in provinces:
    if province == 'Khon Kaen' : continue
    df = pd.read_csv(f"./data/Train/{province}_imputed.csv", parse_dates=True, index_col=0)
    mega[province] = df.merge(fire_all, left_index=True, right_index=True, how='left')

In [82]:
mega['Bangkok'].loc['2016-3-3']

Unnamed: 0,PM2.5,Temp(C),WindDir,Wind Speed(km/h),Cambodia_frp,Myanmar_frp,Thailand_frp,Lao_PDR_frp
2016-03-03 08:00:00,62.9,26.4,65.0,15.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 09:00:00,62.9,26.4,65.0,15.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 10:00:00,55.5,31.4,75.0,13.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 11:00:00,55.5,31.4,75.0,13.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 12:00:00,47.9,31.4,75.0,13.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 13:00:00,43.6,34.1,70.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 14:00:00,28.6,34.1,70.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 15:00:00,33.6,34.1,70.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 16:00:00,34.8,34.6,60.0,12.0,8.984035,8.099819,9.423972,7.991304
2016-03-03 17:00:00,31.3,34.6,60.0,12.0,8.984035,8.099819,9.423972,7.991304


## Save extracted data

In [85]:
for province in provinces:
    if province == 'Khon Kaen' : continue
    path = f'./data/Train/fire_integrated/{province}_fire_integrated.csv'
    if not os.path.exists(path):
        mega[province].to_csv(path)
    else:
        print(f"{province} already")

Bangkok already
Chanthaburi already
Chiang Mai already
Kanchanaburi already


# Impute nulls