In [1]:
import collections
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sys
import warnings
import argparse
import math
import numpy as np
import pandas as pd
import dp_accounting
import tensorflow as tf
import tensorflow_federated as tff
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import nest_asyncio
nest_asyncio.apply()

In [2]:
trian_path = "train.csv"
test_path = "test.csv"

attr = 'Lane 1 Flow (Veh/5 Minutes)'
lags = 12
df_train = pd.read_csv(trian_path, encoding='utf-8').fillna(0)
df_test = pd.read_csv(test_path, encoding='utf-8').fillna(0)


In [3]:
scaler = MinMaxScaler(feature_range=(0, 1)).fit(df_train[attr].values.reshape(-1, 1))
flow1 = scaler.transform(df_train[attr].values.reshape(-1, 1)).reshape(1, -1)[0]
flow2 = scaler.transform(df_test[attr].values.reshape(-1, 1)).reshape(1, -1)[0]

train, test = [], []
for i in range(lags, len(flow1)):
    train.append(flow1[i - lags: i + 1])
for i in range(lags, len(flow2)):
    test.append(flow2[i - lags: i + 1])

train = np.array(train).astype(np.float32)
train = np.expand_dims(train, axis=-1)
test = np.array(test).astype(np.float32)
test = np.expand_dims(test, axis=-1)
np.random.shuffle(train)

X_train = train[:, :-1]
y_train = train[:, -1]
X_test = test[:, :-1]
y_test = test[:, -1]

In [4]:
X_train.shape

(7764, 12, 1)

In [5]:
y_train.shape

(7764, 1)

In [6]:
EDGE_NUM = 50
LOCAL_EPOCHS = 10
BATCH_SIZE = 32
GLOBAL_EPOCHS = 5

In [7]:
#分发数据
train_data, test_data, val_data = [], [], []
for edge_ids in range(EDGE_NUM):
    data_length = X_train.shape[0] // EDGE_NUM #边缘节点数据长度
    temp_data = X_train[data_length*edge_ids:data_length*edge_ids+data_length]
    tf.expand_dims(temp_data,axis=-1)
    temp_label = y_train[data_length*edge_ids:data_length*edge_ids+data_length]
    temp_dataset = tf.data.Dataset.from_tensor_slices((temp_data, temp_label)).repeat(LOCAL_EPOCHS).batch(BATCH_SIZE)
    train_data.append(temp_dataset)
temp_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
val_data.append(temp_dataset.batch(BATCH_SIZE))
temp_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_data.append(temp_dataset.batch(BATCH_SIZE))

In [8]:
train_data[0]

<BatchDataset shapes: ((None, 12, 1), (None, 1)), types: (tf.float32, tf.float32)>

In [9]:
def input_spec():
    return (
        tf.TensorSpec([None, 12, 1], tf.float32),
        tf.TensorSpec([None, 1], tf.float32)
    )

def model_fn():
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(64, input_shape=(12, 1), return_sequences=True),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1, activation='sigmoid'),
    ])

    return tff.learning.from_keras_model(
        model,
        input_spec=input_spec(),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

In [10]:
evaluator = tff.learning.build_federated_evaluation(model_fn)

#### DP

In [12]:
total_clients = EDGE_NUM
clients_per_thread = 5
# tff.backends.native.set_sync_local_cpp_execution_context(
#     max_concurrent_computation_calls=total_clients / clients_per_thread)

def train(rounds, noise_multiplier, clients_per_round, data_frame):
  # Using the `dp_aggregator` here turns on differential privacy with adaptive
  # clipping.
  aggregation_factory = tff.learning.model_update_aggregator.dp_aggregator(
      noise_multiplier, clients_per_round)

  # We use Poisson subsampling which gives slightly tighter privacy guarantees
  # compared to having a fixed number of clients per round. The actual number of
  # clients per round is stochastic with mean clients_per_round.
  sampling_prob = clients_per_round / total_clients

  # Build a federated averaging process.
  # Typically a non-adaptive server optimizer is used because the noise in the
  # updates can cause the second moment accumulators to become very large
  # prematurely.
  learning_process = tff.learning.algorithms.build_unweighted_fed_avg(
        model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.Adam(),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(1.0, momentum=0.9),
        model_aggregator=aggregation_factory)

  eval_process = tff.learning.build_federated_evaluation(model_fn)

  # Training loop.
  state = learning_process.initialize()
  for round in range(rounds):
    if round % 5 == 0:
      model_weights = learning_process.get_model_weights(state)
      metrics = eval_process(model_weights, [test_data])['eval']
      if round < 25 or round % 25 == 0:
        print(f'Round {round:3d}: {metrics}')
      data_frame = data_frame.append({'Round': round,
                                      'NoiseMultiplier': noise_multiplier,
                                      **metrics}, ignore_index=True)

    # Sample clients for a round. Note that if your dataset is large and
    # sampling_prob is small, it would be faster to use gap sampling.
    x = np.random.uniform(size=total_clients)
#     sampled_clients = [
#         train_data.client_ids[i] for i in range(total_clients)
#         if x[i] < sampling_prob]
    sampled_train_data = [
        train_data[i] for i in range(total_clients) if x[i] < sampling_prob]

    # Use selected clients for update.
    result = learning_process.next(state, sampled_train_data)
    state = result.state
    metrics = result.metrics

  model_weights = learning_process.get_model_weights(state)
  metrics = eval_process(model_weights, [test_data])['eval']
  print(f'Round {rounds:3d}: {metrics}')
  data_frame = data_frame.append({'Round': rounds,
                                  'NoiseMultiplier': noise_multiplier,
                                  **metrics}, ignore_index=True)

  return data_frame

In [13]:
data_frame = pd.DataFrame()
rounds = 100
clients_per_round = 50

for noise_multiplier in [0.0, 0.5, 0.75, 1.0]:
  print(f'Starting training with noise multiplier: {noise_multiplier}')
  data_frame = train(rounds, noise_multiplier, clients_per_round, data_frame)
  print()

Starting training with noise multiplier: 0.0


AttributeError: module 'tensorflow_federated.python.learning' has no attribute 'model_update_aggregator'