## A Federated model for PM2.5 prediction#
This notebook creates federated models for the PM2.5 prediction in Beijing based on pm25_beijing.py

In [9]:
import tqdm
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_federated as tff
from datetime import datetime

from pm25_beijing import DataHandler, fed_model_fn, create_fed_lstm

DATA_PATH = "data/pollution-data/"
FEATURES_TO_USE = ["TEMP", "PRES", "DEWP", "RAIN", "WSPM", "wd", "month", "day", "hour"]
TIMESTEPS = 48 # How many steps the LSTM should take into account
NUM_REG_CLASSES = 3
TRAIN_SPLIT = 0.25
BUFFER_SIZE = 256
BATCH_SIZE = 192
lr_client = 0.1
lr_server = 1
FEDERATED_TRAINING_ROUNDS = 300
REDUCE_LR_EVERY = 64
MODEL_PATH_PREFIX = "models/federated/"
LOGFILE_PREFIX = "tensorboard-logs/"
infos = f"reduce-ev{REDUCE_LR_EVERY}-b-{BATCH_SIZE}-lstm-SGD{lr_client}-SGD{lr_server}"
logfile = f'{LOGFILE_PREFIX}{datetime.now()} {infos}'  # für tensorboard log-Dateien

In [3]:
# Preparing the data
data = DataHandler(DATA_PATH, features_to_use=FEATURES_TO_USE)
data.preprocess_data(minmax_features=FEATURES_TO_USE)
data.interpolate()

Recognized wd (wind direction) as feature. Create columns north, east, south and west automatically.
Creating multiple classes from wd (wind direction):


100%|██████████| 12/12 [00:46<00:00,  3.84s/it]




In [4]:
federated_train_data = []
federated_test_data = None
federated_test_labels = []
i = 0
for station in data.station:
    i = i + 1
    pm_data, labels = data.create_classes(NUM_REG_CLASSES, station=station)
    nr_train = int(TRAIN_SPLIT*len(data.data[station]))
    nr_test = len(data.data[station]) - nr_train
    print(f"Assembling data {i} of {len(data.station)}...")
    train_data_orig = data.create_model_input(TIMESTEPS, station=[station])

    test_data = train_data_orig[:nr_train]
    train_data = train_data_orig[nr_train:]
    train_labels = labels[nr_train:-TIMESTEPS]
    test_labels = labels[:nr_train]

    ds = tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(train_data), tf.convert_to_tensor(train_labels)))
    federated_train_data.append(ds)
    if federated_test_data is None:
        federated_test_data = test_data
    else:
        federated_test_data = np.concatenate((federated_test_data, test_data))
    federated_test_labels.append(test_labels)
test_ds = tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(federated_test_data),
                                              tf.convert_to_tensor(pd.concat(federated_test_labels))))


Assembling data 1 of 12...
Aotizhongxin (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:38<00:00, 915.79it/s] 
2023-01-19 20:08:20.018190: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-01-19 20:08:20.018249: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-01-19 20:08:20.018284: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (svsram): /proc/driver/nvidia/version does not exist


Assembling data 2 of 12...
Changping (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:37<00:00, 931.81it/s] 


Assembling data 3 of 12...
Dingling (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:38<00:00, 912.61it/s] 


Assembling data 4 of 12...
Dongsi (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:38<00:00, 917.54it/s] 


Assembling data 5 of 12...
Guanyuan (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:37<00:00, 926.33it/s] 


Assembling data 6 of 12...
Gucheng (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:38<00:00, 913.28it/s] 


Assembling data 7 of 12...
Huairou (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:37<00:00, 925.97it/s] 


Assembling data 8 of 12...
Nongzhanguan (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:38<00:00, 916.30it/s] 


Assembling data 9 of 12...
Shunyi (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:37<00:00, 930.90it/s] 


Assembling data 10 of 12...
Tiantan (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:37<00:00, 921.64it/s] 


Assembling data 11 of 12...
Wanliu (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:38<00:00, 913.00it/s] 


Assembling data 12 of 12...
Wanshouxigong (1/1)
Creating model input from ['TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM', 'month', 'day', 'hour', 'wd_N', 'wd_E', 'wd_S', 'wd_W']


100%|██████████| 35016/35016 [00:37<00:00, 926.10it/s] 


In [5]:
batched_federated_train_data = [ds.batch(BATCH_SIZE) for ds in federated_train_data]
batched_test_ds = test_ds.batch(BATCH_SIZE)

In [6]:
# For Tensorboard use
import nest_asyncio
nest_asyncio.apply()
%reload_ext tensorboard
%tensorboard --logdir tensorboard-logs --port=6006

Launching TensorBoard...

In [15]:
# The federated learning
tff.federated_computation(lambda: 'Initialized!')()
iterative_process = tff.learning.algorithms.build_weighted_fed_avg_with_optimizer_schedule(
    fed_model_fn,
    client_learning_rate_fn=lambda round_n: lr_client/10**tf.math.floor(tf.divide(round_n, REDUCE_LR_EVERY)),
    client_optimizer_fn=lambda x: tf.keras.optimizers.SGD(learning_rate=x),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=lr_server)
)

print(iterative_process.initialize.type_signature.formatted_representation())
state = iterative_process.initialize()

summary_writer = tf.summary.create_file_writer(logfile)
with summary_writer.as_default():
    for round_num in tqdm.tqdm(range(1, FEDERATED_TRAINING_ROUNDS)):
        result = iterative_process.next(state, batched_federated_train_data)
        state = result.state
        metrics = result.metrics
        for name, value in metrics['client_work']['train'].items():
            tf.summary.scalar(name, value, step=round_num)
        # Test resulting model
        model = create_fed_lstm()
        model_weights = iterative_process.get_model_weights(state)
        model_weights.assign_weights_to(model)
        model.compile(
            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
            optimizer='sgd',
            metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
        )
        scores = model.evaluate(batched_test_ds, verbose=0)
        tf.summary.scalar('test_accuracy', scores[1], step=round_num)
        tf.summary.scalar('test_precision', scores[2], step=round_num)
        tf.summary.scalar('test_recall', scores[3], step=round_num)
        lr = lr_client/(10**(int(round_num/REDUCE_LR_EVERY)))
        tf.summary.scalar('z_learning_rate', lr, step=round_num)

print(f'\nFINISHED federated training, logfile: {logfile}, NOW: {datetime.now()}')

[libprotobuf INFO google/protobuf/util/message_differencer.cc:1419] Proto type 'tensorflow.GraphDef' not found


( -> <
  global_model_weights=<
    trainable=<
      float32[12,24],
      float32[6,24],
      float32[24],
      float32[6,16],
      float32[4,16],
      float32[16],
      float32[4,3],
      float32[3]
    >,
    non_trainable=<>
  >,
  distributor=<>,
  client_work=int32,
  aggregator=<
    value_sum_process=<>,
    weight_sum_process=<>
  >,
  finalizer=<
    int64
  >
>@SERVER)


  0%|          | 0/299 [00:05<?, ?it/s]


KeyboardInterrupt: 

In [14]:
model.save(f"{MODEL_PATH_PREFIX}federated_model_{datetime.now()}")



INFO:tensorflow:Assets written to: models/federated/federated_model_2023-01-19 21:06:18.691249/assets


INFO:tensorflow:Assets written to: models/federated/federated_model_2023-01-19 21:06:18.691249/assets
