## Local models for pm2.5 prediction
This notebook creates and trains one single station for each location and saves it at the given path.

In [6]:
import os
import json
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

import pm25_beijing

# database path:
DATA_PATH = "data/pollution-data/"
FEATURES_TO_USE = ["TEMP", "PRES", "DEWP", "RAIN", "WSPM", "wd", "month", "day", "hour"]
TIMESTEPS = 48 # How many steps the LSTM should take into account
NUM_REG_CLASSES = 3

In [4]:
# With this DataHandler the run function does not need to preprocess everything again
data = pm25_beijing.DataHandler(DATA_PATH, features_to_use=FEATURES_TO_USE, col_to_predict=["PM2.5"])
data.preprocess_data(minmax_features=FEATURES_TO_USE)
data.interpolate()
# As each Model is trained separately, the model input should not be created
# over the full data.
# data.create_model_input(TIMESTEPS, save_data=True)
_, data_labels = data.create_classes(NUM_REG_CLASSES, features=["PM2.5"])

Recognized wd (wind direction) as feature. Create columns north, east, south and west automatically.
Creating multiple classes from wd (wind direction):


100%|██████████| 12/12 [00:30<00:00,  2.51s/it]




In [8]:
 # build models for ALL stations
save_prefix = 'models/station_models/'
all_stations = [file[file.index("_", file.index("_")+1)+1:file.rindex("_")] for file in os.listdir(DATA_PATH)]
all_stations_loss = []
all_stations_models = []

for station_name in all_stations:
    print(f'\n##########--------STATION {station_name}-----------------#########\n')
    lstm = pm25_beijing.create_lstm(TIMESTEPS, len(FEATURES_TO_USE)+3, num_output_classes=NUM_REG_CLASSES)
    trained_model, loss = pm25_beijing.run_model(data, lstm, station=station_name, features=FEATURES_TO_USE, shuffle=False, num_classes=NUM_REG_CLASSES, epochs=20)
    all_stations_loss.append(loss)
    all_stations_models.append(trained_model)
    trained_model.save(f'{save_prefix}{station_name}/')
    with open(f'{save_prefix}{station_name}/loss.txt', 'w') as f:
        json.dump(loss.history, f)
    pm25_beijing.save_loss_plot(loss, f'{save_prefix}{station_name}/loss.png')


##########--------STATION Aotizhongxin-----------------#########



2023-01-17 21:57:33.166999: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-01-17 21:57:33.167026: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-01-17 21:57:33.167044: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (svsram): /proc/driver/nvidia/version does not exist


---------------------Preprocessing data--------------------------
-------------------Creating training data------------------------
Aotizhongxin (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


  8%|▊         | 2756/35016 [00:02<00:27, 1192.27it/s]


KeyboardInterrupt: 

In [12]:
# Load models to avoid training above
all_stations_models = []
all_stations = data.station
save_prefix = "models/station_models/"

for s in all_stations:
    model = keras.models.load_model(f'{save_prefix}{s}/')
    all_stations_models.append(model)


In [13]:
# Predict values for all local models
f1_all_models = []
predictions_am = []
true_labels_am = []
for s in range(len(all_stations_models)):
    station = all_stations[s]
    model_input = data.create_model_input(TIMESTEPS, station=[station],
                                          save_data=False)
    _, labels = data.create_classes(NUM_REG_CLASSES, features=["PM2.5"], station=station)
    x, y, x_l, y_l = data.train_test_split(model_input, labels, TIMESTEPS,
                                        num_stations=1,
                                        test_split=0, shuffle_data=False)
    f1, true_y, pred_y = pm25_beijing.get_metrics(all_stations_models[s],
                                                  x, x_l, return_labels=True)
    f1_all_models.append(f1)
    predictions_am.append(pred_y)
    true_labels_am.append(true_y)


Aotizhongxin (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1199.78it/s]


Changping (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1201.01it/s]


Dingling (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1198.56it/s]


Dongsi (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1204.95it/s]


Guanyuan (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:28<00:00, 1209.27it/s]


Gucheng (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1206.20it/s]


Huairou (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1207.31it/s]


Nongzhanguan (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1197.59it/s]


Shunyi (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:28<00:00, 1215.78it/s]


Tiantan (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:28<00:00, 1213.63it/s]


Wanliu (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:28<00:00, 1208.13it/s]


Wanshouxigong (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:29<00:00, 1202.41it/s]




In [16]:
# Mean, precision, recall, f1 from all local models
for s in range(len(all_stations)):
    print("------------------" + all_stations[s] + "-----------------------")
    precision, recall, f1, _ = precision_recall_fscore_support(predictions_am[s], true_labels_am[s], average="macro")
    accuracy = accuracy_score(predictions_am[s], true_labels_am[s])

    print("Precision:   " + str(round(precision, 4)))
    print("Recall:      " + str(round(recall, 4)))
    print("F1 Score:    " + str(round(f1, 4)))
    print("Accuracy:    " + str(round(accuracy, 4)))
    print("Confusion Matrix:")
    print(confusion_matrix(predictions_am[s], true_labels_am[s]))

------------------Aotizhongxin-----------------------
Precision:   0.6815
Recall:      0.6881
F1 Score:    0.6838
Accuracy:    0.6823
Confusion Matrix:
[[8520 1700  190]
 [2283 6635 3063]
 [ 742 3147 8736]]
------------------Changping-----------------------
Precision:   0.705
Recall:      0.7005
F1 Score:    0.7006
Accuracy:    0.705
Confusion Matrix:
[[10186  1962   158]
 [ 2534  6483  1946]
 [  933  2797  8017]]
------------------Dingling-----------------------
Precision:   0.717
Recall:      0.7184
F1 Score:    0.7176
Accuracy:    0.7316
Confusion Matrix:
[[12654  2705   210]
 [ 1840  6250  2274]
 [  693  1675  6715]]
------------------Dongsi-----------------------
Precision:   0.7026
Recall:      0.6994
F1 Score:    0.7003
Accuracy:    0.703
Confusion Matrix:
[[9166 2569  431]
 [1586 6370 2997]
 [ 433 2385 9079]]
------------------Guanyuan-----------------------
Precision:   0.6948
Recall:      0.7013
F1 Score:    0.6929
Accuracy:    0.6953
Confusion Matrix:
[[ 7981  1529   129]
 [