In [1]:
from util_input_output_model import *
from collections import defaultdict
from datetime import timedelta
import tensorflow as tf
import numpy as np
import pandas as pd
import time
provinces = ['Bangkok','Chanthaburi','Chiang Mai','Kanchanaburi','Songkhla','Khon Kaen']

features = ['PM2.5','WindDir','Wind Speed(km/h)','Temp(C)',
            'Cambodia_frp','Myanmar_frp','Thailand_frp','Lao_PDR_frp']
tf.__version__

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


'2.5.0-rc1'

# 1) Preparing data

In [2]:
timesteps = 720
print(f'timesteps = {timesteps}')

timesteps = 720


In [3]:
# Takes 5-10 min
feature_used = ['PM2.5','WindDir','Wind Speed(km/h)','Temp(C)']
Train_data, X_train, Y_train = prepare_train_data(timesteps, feature_used = feature_used)
Test_data, X_test, Y_test = prepare_new_test(Train_data, timesteps, feature_used = feature_used)

# 2) Standardize the data


In [4]:
# Scale train set
x_train_scalers, y_train_scalers, X_train_scaled, Y_train_scaled = scale_data(X_train, Y_train,Train_data)

In [18]:
# Scale test set
X_test_scaled = defaultdict(lambda: [])
Y_test_scaled = defaultdict(lambda: [])

for p in provinces:
    print(p)
    for e in X_test[p]:
        X_test_scaled[p].append(x_train_scalers[p].transform(e))
    for e in Y_test[p]:
        Y_test_scaled[p].append(y_train_scalers[p].transform(e))

Bangkok
Chanthaburi
Chiang Mai
Kanchanaburi
Songkhla
Khon Kaen


# 3) Reshaping

In [6]:
x_, y_ = {}, {}

for p in provinces:
    x_[p] = {"Train": np.array(X_train_scaled[p]),
             "Test": np.array(X_test_scaled[p])}
    
    y_[p] = {"Train": np.array(Y_train_scaled[p]).squeeze(axis=2),
             "Test": np.array(Y_test_scaled[p]).squeeze(axis=2)}

    print(p)
    print(f'X Train: {x_[p]["Train"].shape}')
    print(f'Y Train: {y_[p]["Train"].shape}')
    print(f'X Test: {x_[p]["Test"].shape}')
    print(f'Y Test: {y_[p]["Test"].shape}\n')

Bangkok
X Train: (4306, 720, 4)
Y Train: (4306, 72)
X Test: (1112, 720, 4)
Y Test: (1112, 72)

Chanthaburi
X Train: (4306, 720, 4)
Y Train: (4306, 72)
X Test: (1112, 720, 4)
Y Test: (1112, 72)

Chiang Mai
X Train: (4306, 720, 4)
Y Train: (4306, 72)
X Test: (1112, 720, 4)
Y Test: (1112, 72)

Kanchanaburi
X Train: (4306, 720, 4)
Y Train: (4306, 72)
X Test: (1112, 720, 4)
Y Test: (1112, 72)

Songkhla
X Train: (4306, 720, 4)
Y Train: (4306, 72)
X Test: (1127, 720, 4)
Y Test: (1127, 72)

Khon Kaen
X Train: (1876, 720, 4)
Y Train: (1876, 72)
X Test: (1110, 720, 4)
Y Test: (1110, 72)



# 4) Load the model

These models below have *720* timesteps as an input, each timestep contains 4 features(PM2.5, Wind direction, Wind Speed, Temp) and give RMSE = **10.56**

    - Bangkok : Models\Bangkok_run_2021_05_07-20_31_15_chckpoint.h5
    - Chanthaburi : Models\Chanthaburi_run_2021_05_08-12_58_02_chckpoint.h5
    - Songkhla : Models\Songkhla_run_2021_05_08-19_56_09_chckpoint.h5
    - Kanchanaburi : Models\Kanchanaburi_run_2021_05_08-15_58_45_chckpoint.h5
    - Khon Kaen : Models\Khon Kaen_run_2021_05_07-23_38_36_chckpoint.h5
    - Chiang Mai : Models\Chiang Mai_run_2021_05_08-07_41_04_chckpoint.h5
        
These models below have *360* timesteps as an input, each timestep contains all features(PM2.5, Wind direction, Wind Speed, Temp, fire from 4 countries) and give RMSE = **10.96**

    - Bangkok : Models\Bangkok_run_2021_04_30-14_43_39.h5
    - Chanthaburi : Models\Chanthaburi_run_2021_04_26-10_42_58_final_26Apr1155.h5
    - Songkhla : Models\Songkhla_run_2021_04_25-22_41_22.h5
    - Kanchanaburi : Models\Kanchanaburi_run_2021_04_30-16_22_11.h5
    - Khon Kaen : Models\Khon Kaen_run_2021_04_27-16_20_46_kind_of_final.h5
    - Chiang Mai : Models\Chiang Mai_run_2021_04_30-15_37_01.h5

# 5) Submission

In [7]:
models = {'Bangkok':'Models\Bangkok_run_2021_05_07-20_31_15_chckpoint.h5',
'Chanthaburi':'Models\Chanthaburi_run_2021_05_08-12_58_02_chckpoint.h5',
'Songkhla':'Models\Songkhla_run_2021_05_08-19_56_09_chckpoint.h5',
'Kanchanaburi':'Models\Kanchanaburi_run_2021_05_08-15_58_45_chckpoint.h5',
'Khon Kaen':'Models\Khon Kaen_run_2021_05_07-23_38_36_chckpoint.h5',
'Chiang Mai':'Models\Chiang Mai_run_2021_05_08-07_41_04_chckpoint.h5'}
grand = []
for province in ["Chanthaburi","Chiang Mai","Kanchanaburi","Bangkok","Khon Kaen","Songkhla"]:

    # Load the trained model
    model = tf.keras.models.load_model(models[province])

    # Load Test data
    x_eval = x_[province]['Test']

    # Predict Test data
    pred = model(x_eval)

    # Store predictions in array
    prediction_1D = []
    for e in pred:
        prediction_1D.append(y_train_scalers[province].inverse_transform(e.numpy().reshape((-1,1))))

    prediction_1D = np.array(prediction_1D).reshape((-1,1))
    province_prediction = pd.DataFrame({'Predicted':prediction_1D.ravel()})

    grand.append(province_prediction)
    print(f"{province} done")

Chanthaburi done
Chiang Mai done
Kanchanaburi done
Bangkok done
Khon Kaen done
Songkhla done


In [8]:
# This is what the prediction look like
pd.concat(grand, ignore_index=True)

Unnamed: 0,Predicted
0,29.006260
1,29.083462
2,28.830957
3,29.209991
4,28.931713
...,...
481315,12.511642
481316,12.568989
481317,12.041894
481318,11.637535


In [9]:
# Save the result
submit = pd.concat(grand, ignore_index=True).rename_axis('Id') #.to_csv("./predictions/sub3.csv")

## 5.1) make_submission
ในส่วนนี้คัดลอกมาจากไฟล์ `make_submission.ipynb` ที่พี่ TA ให้ใหม่หลังจากแก้ submission ใน Kaggle

In [10]:
miss_index = pd.read_csv('./predictions/missing_index.csv')
prediction_index = pd.read_csv('./predictions/output_index.csv')

In [11]:
submit['datetime index'] = prediction_index['predicted timestamp']
submit[['Predicted','datetime index']]

Unnamed: 0_level_0,Predicted,datetime index
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,29.006260,Chanthaburi 2019-03-18 13:00:00
1,29.083462,Chanthaburi 2019-03-18 14:00:00
2,28.830957,Chanthaburi 2019-03-18 15:00:00
3,29.209991,Chanthaburi 2019-03-18 16:00:00
4,28.931713,Chanthaburi 2019-03-18 17:00:00
...,...,...
481315,12.511642,Songkhla 2020-03-18 14:00:00
481316,12.568989,Songkhla 2020-03-18 15:00:00
481317,12.041894,Songkhla 2020-03-18 16:00:00
481318,11.637535,Songkhla 2020-03-18 17:00:00


In [12]:
cond = submit['datetime index'].isin(miss_index['Missing date'])
submit.drop(submit[cond].index, inplace = True)

In [13]:
submit.reset_index(inplace=True)
submit.drop(columns={'Id'},inplace=True)
submit

Unnamed: 0,Predicted,datetime index
0,29.006260,Chanthaburi 2019-03-18 13:00:00
1,29.083462,Chanthaburi 2019-03-18 14:00:00
2,28.830957,Chanthaburi 2019-03-18 15:00:00
3,29.209991,Chanthaburi 2019-03-18 16:00:00
4,28.931713,Chanthaburi 2019-03-18 17:00:00
...,...,...
448549,12.511642,Songkhla 2020-03-18 14:00:00
448550,12.568989,Songkhla 2020-03-18 15:00:00
448551,12.041894,Songkhla 2020-03-18 16:00:00
448552,11.637535,Songkhla 2020-03-18 17:00:00


In [14]:
submit['id'] = submit.index
submit = submit[['Predicted','id']]

In [15]:
submit.set_index('id',inplace=True)
submit

Unnamed: 0_level_0,Predicted
id,Unnamed: 1_level_1
0,29.006260
1,29.083462
2,28.830957
3,29.209991
4,28.931713
...,...
448549,12.511642
448550,12.568989
448551,12.041894
448552,11.637535


In [16]:
import time
time_now = time.strftime("%Y_%b_%d-%H_%M_%S")
submit.to_csv(f'./predictions/submission_{time_now}.csv')