## Step 1: Import Libraries
We begin by importing the necessary libraries: PyTorch for building and training the neural network, and NumPy for handling the dataset.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import csv
import os
import pandas as pd
import numpy as np
import json
import random
import tensorflow as tf
import tensorflow_federated as tff
import collections

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

2024-11-17 23:33:51.842519: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-17 23:33:53.122727: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-17 23:33:57.108043: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-17 23:33:57.108072: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-17 23:33:57.119588: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

## Step 2.1: Define Hyperparameters

In [5]:
# Number of models in the federated learning setup
NUM_CLIENTS = 100
# Number of previous time points used for forecasting the next point
SEQUENCE_LENGTH = 20
# Number of epochs for training
EPOCHS = 100
# Interval at which to save weights
SAVE_INTERVAL = 1
# Initial learning rate
LEARNING_RATE = 0.01
# Client matrix dir
CLIENT_MATRIX_DIR = "client_status_data"
# Client matrix file
CLIENT_MATRIX_FILE = "client_status_random_on_off.csv"

## Step 2.2: Create client status matrix

In [15]:
def create_csv_client_matrix(filename, rows=160, columns=100, percentage_Y=0.1):
    # Initialize the previous row as empty to enforce the no-adjacency rule
    prev_row = [''] * columns
    
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        
        for row_idx in range(rows):
            row = ['Y'] * columns

            if row_idx >= 3:
                N_count = int(columns * percentage_Y)
                
                # Ensure that 'N' is placed in positions not occupied by 'N' in the previous row
                possible_positions = [i for i in range(columns) if prev_row[i] != 'N']
                N_positions = random.sample(possible_positions, min(N_count, len(possible_positions)))
                
                for pos in N_positions:
                    row[pos] = 'N'
            
            # Write the current row to the CSV file
            writer.writerow(row)
            
            # Update prev_row for the next iteration
            prev_row = row

create_csv_client_matrix(f'{CLIENT_MATRIX_DIR}/{CLIENT_MATRIX_FILE}')

## Step 2.3: Extract time series subset for each client based on columns of the whole dataset

In [10]:
def extract_csv_subset(input_filename, output_filename, max_rows=1000, max_columns=100):
    with open(input_filename, mode='r', newline='') as infile:
        reader = csv.reader(infile)
        with open(output_filename, mode='w', newline='') as outfile:
            writer = csv.writer(outfile)
            
            for i, row in enumerate(reader):
                if i >= max_rows:  # Stop after reaching the specified row limit
                    break
                # Write only the specified number of columns for each row
                writer.writerow(row[:max_columns])

# Extract train dataset
extract_csv_subset("data/electricity.csv", "data/electricity_26000x100.csv", max_rows=26000, max_columns=101)

# Extract test dataset
# extract_csv_subset("data/hourly_test.csv", "data/hourly_test_415x48.csv", max_rows = 415, max_columns = 49)

## Step 3: Create Dataset Directories and CSV Files
We create separate directories for each model, and each directory contains a unique univariate time series dataset.

In [11]:
# Create sequences for supervised learning
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(data[i + sequence_length])
    return np.array(X), np.array(y)

data = pd.read_csv("data/electricity_26000x100.csv")
# print(data)

# Create directories for each model dataset
for i in range(NUM_CLIENTS):
    directory = f'models_data/model_{i}_data'
    if not os.path.exists(directory):
        os.makedirs(directory)

for i in range(NUM_CLIENTS):
    X, y = create_sequences(data.loc[:, str(i)], SEQUENCE_LENGTH)

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

    # Scale the data (normalization)
    scaler = preprocessing.MinMaxScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, 1)).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, 1)).reshape(X_test.shape)
    y_train = scaler.fit_transform(y_train.reshape(-1, 1)).flatten()
    y_test = scaler.transform(y_test.reshape(-1, 1)).flatten()
    print(y_train)

    # Save datasets
    np.save(f'models_data/model_{i}_data/x_train.npy', X_train)
    np.save(f'models_data/model_{i}_data/y_train.npy', y_train)
    np.save(f'models_data/model_{i}_data/x_test.npy', X_test)
    np.save(f'models_data/model_{i}_data/y_test.npy', y_test)

[0.42335766 0.09489051 0.05839416 ... 0.17518248 0.08029197 0.45255474]
[0.44401544 0.50579151 0.42471042 ... 0.38223938 0.3976834  0.5019305 ]
[0.01331115 0.0249584  0.03327787 ... 0.01996672 0.01331115 0.01497504]
[0.26495726 0.31623932 0.57008547 ... 0.33846154 0.44188034 0.44444444]
[0.3071298  0.30347349 0.488117   ... 0.43875686 0.36745887 0.28884826]
[0.41985887 0.42741935 0.56955645 ... 0.40322581 0.38356855 0.33064516]
[0.07386364 0.07386364 0.09090909 ... 0.09090909 0.10227273 0.06818182]
[0.48267202 0.48116524 0.63636364 ... 0.5183325  0.49171271 0.51682572]
[0.46       0.72333333 0.52166667 ... 0.58833333 0.40833333 0.39166667]
[0.50862069 0.33477011 0.49712644 ... 0.22988506 0.21695402 0.16091954]
[0.37142857 0.37402597 0.54285714 ... 0.49090909 0.36103896 0.37142857]
[0.80142687 0.50891795 0.20927467 ... 0.56718193 0.1783591  0.38049941]
[0.56360078 0.49510763 0.46379648 ... 0.78277886 0.24657534 0.32876712]
[0.30446927 0.33240223 0.53631285 ... 0.31005587 0.43575419 0.33

## Step 5: Define the Neural Network
We define a neural network with two hidden layers.

In [None]:
# Define the neural network
class ComplexNet(nn.Module):
    def __init__(self, input_size):
        super(ComplexNet, self).__init__()
        self.hidden1 = nn.Linear(input_size, 10)
        self.bn1 = nn.BatchNorm1d(10)
        self.hidden2 = nn.Linear(10, 5)
        self.bn2 = nn.BatchNorm1d(5)
        self.output = nn.Linear(5, 1)
        
        # Initialize weights
        nn.init.kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
        nn.init.kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
        nn.init.xavier_uniform_(self.output.weight)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.hidden1(x)))
        x = torch.relu(self.bn2(self.hidden2(x)))
        x = self.output(x)
        return x

In [None]:
def alexnet_regression(input_shape):
    model = tf.keras.Sequential([
        # Input Layer
        tf.keras.layers.InputLayer(input_shape=input_shape[1:]),

        # Fully connected layers for smaller inputs
        tf.keras.layers.Dense(4096, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(2048, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(512, activation='relu'),

        # Regression Output
        tf.keras.layers.Dense(1)  # Single unit for regression output
    ])
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error',
                  metrics=['mae'])  # Regression loss and metric
    return model

## Step 6: Train Multiple Models Independently and Save Weights
We instantiate and train `NUM_MODELS` separate models using the corresponding datasets. During training, we save the weights of each model at every training round to a separate CSV file.

In [None]:
# Function to train a single model and save weights
def train_model(model_id):
    # Load dataset for the model
    directory = f'model_{model_id}_data'
    x_train = np.load(os.path.join('models_data', directory, 'x_train.npy'))
    y_train = np.load(os.path.join('models_data', directory, 'y_train.npy'))
    x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

    # model = alexnet_regression(input_shape=(SEQUENCE_LENGTH, 1))
    # model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    model = ComplexNet(input_size=SEQUENCE_LENGTH)
    criterion = lambda output, target: torch.sqrt(nn.MSELoss()(output, target))
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    # model = alexnet_regression(input_shape)
    # # Compile the model
    # model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # Train the model
    # history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=32, validation_split=0.2, verbose=1)

    # Evaluate the model
    # loss, mae = model.evaluate(X_test, y_test, verbose=0)
    # print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")

    # Predict on the test set
    # y_pred = model.predict(X_test)

    # Rescale predictions and true values
    # y_pred_rescaled = scaler.inverse_transform(y_pred)
    # y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Plot predictions vs true values
    # plt.figure(figsize=(10, 6))
    # plt.plot(y_test_rescaled, label="True Values", color="blue")
    # plt.plot(y_pred_rescaled, label="Predictions", color="orange")
    # plt.title("True vs Predicted Values (AlexNet for Time Series Regression)")
    # plt.xlabel("Time Step")
    # plt.ylabel("Value")
    # plt.legend()
    # plt.grid(True)
    # plt.show()

    csv_filename = f'weights_tracking_models/weights_tracking_model_{model_id}.csv'
    training_losses = []
    for epoch in range(1, EPOCHS + 1):
        # Forward pass
        outputs = model(x_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        training_losses.append(loss.item())

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Save weights at intervals
        if epoch % SAVE_INTERVAL == 0:
            # Extract weights and flatten them
            weights_list = []
            for name, param in model.named_parameters():
                if param.requires_grad:
                    weights_list.extend(param.data.view(-1).tolist())

            # Check if CSV file exists
            file_exists = os.path.isfile(csv_filename)

            # Open CSV file to save weights
            with open(csv_filename, mode='a', newline='') as file:
                writer = csv.writer(file)

                # If file doesn't exist, write the header
                if not file_exists:
                    header = []
                    for name, param in model.named_parameters():
                        if param.requires_grad:
                            num_weights = param.data.numel()
                            header.extend([f'{name}_weight_{i}' for i in range(num_weights)])
                    writer.writerow(header)

                # Write weights to CSV
                writer.writerow(weights_list)

    # Plot training loss
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, EPOCHS + 1), training_losses, label=f'Model {model_id} Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(f'Training Loss for Model {model_id}')
    plt.show()

    return model


# Train NUM_MODELS models independently and save their weights
models = [train_model(i) for i in range(NUM_CLIENTS)]

## Federated learning

In [None]:
clients_data = []
clients_test_data = []
for cid in range(NUM_CLIENTS):
    directory = f'model_{cid}_data'
    x_train = np.load(os.path.join('models_data', directory, 'x_train.npy'))
    y_train = np.load(os.path.join('models_data', directory, 'y_train.npy')).reshape(-1, 1)

    x_test = np.load(os.path.join('models_data', directory, 'x_test.npy'))
    y_test = np.load(os.path.join('models_data', directory, 'y_test.npy')).reshape(-1, 1)

    clients_data.append((x_train, y_train))
    clients_test_data.append((x_test, y_test))

def get_client_train_data(cid):
  directory = f'model_{cid}_data'
  x_train = np.load(os.path.join('models_data', directory, 'x_train.npy'))
  y_train = np.load(os.path.join('models_data', directory, 'y_train.npy')).reshape(-1, 1)
  return (x_train, y_train)

def get_client_test_data(cid):
  directory = f'model_{cid}_data'
  x_test = np.load(os.path.join('models_data', directory, 'x_test.npy'))
  y_test = np.load(os.path.join('models_data', directory, 'y_test.npy')).reshape(-1, 1)
  return (x_test, y_test)

# federated_data = tf.data.Dataset.from_tensor_slices(clients_data).batch(20)
# federated_test_data = tf.data.Dataset.from_tensor_slices(clients_test_data).batch(20)

# Preprocess data for TFF
def preprocess_client_data(client_data):
    def create_tf_dataset(X, y):
        dataset = tf.data.Dataset.from_tensor_slices((X, y))
        return dataset.batch(10)  # Batch size of 10
    return [create_tf_dataset(X, y) for X, y in client_data]

def preprocess(data):
  #  print(data)
   dataset = tf.data.Dataset.from_tensor_slices(data)
   return dataset.batch(100)

federated_data = [preprocess(get_client_train_data(i)) for i in range(NUM_CLIENTS)] #preprocess_client_data(clients_data)
federated_test_data = [preprocess(get_client_test_data(i)) for i in range(NUM_CLIENTS)] #preprocess_client_data(clients_test_data)

print("Client data: ", len(federated_data))
# print("Client data shape: ", federated_data.shape)

# def preprocess(dataset):

#   def batch_format_fn(element):
#     """Flatten a batch of EMNIST data and return a (features, label) tuple."""
#     return (tf.reshape(element['pixels'], [-1, 784]),
#             tf.reshape(element['label'], [-1, 1]))

#   return dataset.batch(20).map(batch_format_fn)

# class NumpyClientData(tff.simulation.datasets.ClientData):
#     def __init__(self, data):
#         self._data = data
#         self._client_ids = list(data.keys())

#     @property
#     def client_ids(self):
#         return self._client_ids

#     def create_tf_dataset_for_client(self, client_id):
#         # Extract numpy arrays
#         X, y = self._data[client_id]
#         # Wrap in tf.data.Dataset
#         return tf.data.Dataset.from_tensor_slices((X, y)).batch(20)

# clients_data = {}
# clients_test_data = {}
# for cid in range(NUM_CLIENTS):
#   X_train = np.load(os.path.join('models_data', directory, 'x_train.npy'))
#   y_train = np.load(os.path.join('models_data', directory, 'y_train.npy')).reshape(-1, 1)
#   clients_data[str(cid)] = (X_train, y_train)

#   X_test = np.load(os.path.join('models_data', directory, 'x_test.npy'))
#   y_train = np.load(os.path.join('models_data', directory, 'y_test.npy')).reshape(-1, 1)
#   clients_test_data[str(cid)] = (X_test, y_test)

# federated_data = NumpyClientData(clients_data)
# federated_data_test = NumpyClientData(clients_test_data)

# federated_train_data = [preprocess(federated_data.create_tf_dataset_for_client(str(x)))
#   for x in range(NUM_CLIENTS)
# ]

# federated_test_data = [preprocess(federated_data_test.create_tf_dataset_for_client(str(x)))
#   for x in range(NUM_CLIENTS)
# ]

def convert_spec_to_tff_type(tensor_spec):
    # print("--------- Tensor spec: {}".format(tensor_spec))
    # print("--------- Dummy tensor spec: {} and {}".format(repr(tensor_spec.dtype), tensor_spec.shape))

    return tff.TensorType(dtype=tensor_spec.dtype.as_numpy_dtype, shape=tensor_spec.shape)

# Define the model
def create_regression_model(input_dim):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(input_dim,)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)  # Regression output

        # tf.keras.layers.Input(shape=(784,)),
        # tf.keras.layers.Dense(10, kernel_initializer=initializer),
        # tf.keras.layers.Softmax(),
    ])
    return model

# Wrap the Keras model for TFF
def model_fn():
    keras_model = create_regression_model(20)
    return tff.learning.models.from_keras_model(
        keras_model=keras_model,
        input_spec=federated_data[0].element_spec,
        loss_fn=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.MeanSquaredError()]
    )

tff_model = tff.learning.models.functional_model_from_keras(
        keras_model=create_regression_model(20),
        input_spec=federated_data[0].element_spec,
        loss_fn=tf.keras.losses.MeanSquaredError(),
        metrics_constructor=collections.OrderedDict(
           accuracy=tf.keras.metrics.MeanSquaredError
        )
    )

@tff.tensorflow.computation
def server_init():
  return tff.learning.models.ModelWeights(*tff_model.initial_weights)

tf_dataset_type = tff.SequenceType(
    # tff.types.to_type(tff_model.input_spec)
    [convert_spec_to_tff_type(spec) for spec in tff_model.input_spec]
)
model_weights_type = server_init.type_signature.result

federated_server_type = tff.FederatedType(model_weights_type, tff.SERVER)
federated_dataset_type = tff.FederatedType(tf_dataset_type, tff.CLIENTS)

@tff.federated_computation
def initialize_fn():
  return tff.federated_eval(server_init, tff.SERVER)

@tf.function
def client_update(model, dataset, initial_weights, client_optimizer):
  """Performs training (using the server model weights) on the client's dataset."""
  # Initialize the client model with the current server weights and the optimizer
  # state.
  client_weights = initial_weights.trainable
  optimizer_state = client_optimizer.initialize(
      tf.nest.map_structure(tf.TensorSpec.from_tensor, client_weights)
  )

  # Use the client_optimizer to update the local model.
  for batch in dataset:
    # tf.print("Training local epoch: ", i)
    x, y = batch
    # print("x, y: ", x, y)
    with tf.GradientTape() as tape:
      tape.watch(client_weights)
      # Compute a forward pass on the batch of data
      outputs = model.predict_on_batch(
          model_weights=(client_weights, ()), x=x, training=True
      )
      loss = model.loss(output=outputs, label=y)

    # Compute the corresponding gradient
    grads = tape.gradient(loss, client_weights)

    # Apply the gradient using a client optimizer.
    optimizer_state, client_weights = client_optimizer.next(
        optimizer_state, weights=client_weights, gradients=grads
    )

    tf.print("Weights of client: ", len(client_weights[0][1]))
    

  return tff.learning.models.ModelWeights(client_weights, non_trainable=())

@tf.function
def server_update(model, mean_client_weights):
  """Updates the server model weights as the average of the client model weights."""
  del model  # Unused, just take the mean_client_weights.
  print("Server update, mean_client_weights: ", mean_client_weights)
  return mean_client_weights

@tff.tensorflow.computation(model_weights_type)
def server_update_fn(mean_client_weights):
  return server_update(tff_model, mean_client_weights)

@tff.tensorflow.computation(tf_dataset_type, model_weights_type)
def client_update_fn(tf_dataset, server_weights):
  client_optimizer = tff.learning.optimizers.build_adamw(learning_rate=0.01)
  return client_update(tff_model, tf_dataset, server_weights, client_optimizer)

@tff.federated_computation(federated_server_type, federated_dataset_type)
def next_fn(server_weights, federated_dataset):
  # Broadcast the server weights to the clients.
  server_weights_at_client = tff.federated_broadcast(server_weights)

  # Each client computes their updated weights.
  client_weights = tff.federated_map(
      client_update_fn, (federated_dataset, server_weights_at_client)
  )

  weight_denom = client_weights.trainable

  tf.print("Client weights: ", weight_denom)

  # The server averages these updates.
  mean_client_weights = tff.federated_mean(client_weights)

  tf.print("Mean client weights: ", mean_client_weights)

  # The server updates its model.
  server_weights = tff.federated_map(server_update_fn, mean_client_weights)

  tf.print("Server weights: ", server_weights)

  return server_weights

federated_algorithm = tff.templates.IterativeProcess(
    initialize_fn=initialize_fn,
    next_fn=next_fn
)

def evaluate(model_weights):
  keras_model = create_regression_model(20)
  keras_model.compile(
      loss=tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.MeanAbsoluteError()],
  )
  model_weights.assign_weights_to(keras_model)
  return keras_model.evaluate(federated_test_data[1])

server_state = federated_algorithm.initialize()

# print(federated_test_data)
# server_state
evaluate(server_state)

loss_history = []
for _ in range(5):
  server_state = federated_algorithm.next(server_state, federated_data)
  tf.print("Server state: ", len(server_state.trainable[0][1]))
  loss, mea = evaluate(server_state)
  loss_history.append(loss)

# Plot loss history
plt.figure(figsize=(10, 6))
plt.plot(range(1, 6), loss_history, label='Training Loss')
plt.title("Federated Learning: Training Loss Across Rounds")
plt.xlabel("Round")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()

Client data:  100
Server update, mean_client_weights:  ModelWeights(trainable=(<tf.Tensor 'mean_client_weights:0' shape=(20, 64) dtype=float32>, <tf.Tensor 'mean_client_weights_1:0' shape=(64,) dtype=float32>, <tf.Tensor 'mean_client_weights_2:0' shape=(64, 32) dtype=float32>, <tf.Tensor 'mean_client_weights_3:0' shape=(32,) dtype=float32>, <tf.Tensor 'mean_client_weights_4:0' shape=(32, 1) dtype=float32>, <tf.Tensor 'mean_client_weights_5:0' shape=(1,) dtype=float32>), non_trainable=())


AttributeError: There is no such attribute 'trainable' in this federated tuple. Valid attributes: ()

In [77]:
def create_keras_model():
  initializer = tf.keras.initializers.GlorotNormal(seed=0)
  return tf.keras.models.Sequential([
      tf.keras.layers.Input(shape=(784,)),
      tf.keras.layers.Dense(10, kernel_initializer=initializer),
      tf.keras.layers.Softmax(),
  ])

keras_model = create_keras_model()
tff_model = tff.learning.models.functional_model_from_keras(
    keras_model,
    loss_fn=tf.keras.losses.SparseCategoricalCrossentropy(),
    input_spec=clients_data[0].element_spec,
    metrics_constructor=collections.OrderedDict(
        accuracy=tf.keras.metrics.SparseCategoricalAccuracy
    ),
)
tff_model = tff.learning.models.functional_model_from_keras(
   keras_model,
   loss_fn=tf.keras.losses.MeanSquaredError(),
   input_spec=clients_data[0].element_spec,
   metrics_constructor=collections.OrderedDict(
      accuracy=tf.keras.metrics.MeanAbsoluteError
   ),
)

In [None]:
clients_data = []
for cid in range(NUM_CLIENTS):
    directory = f'model_{cid}_data'
    x_train = np.load(os.path.join('models_data', directory, 'x_train.npy'))
    y_train = np.load(os.path.join('models_data', directory, 'y_train.npy')).reshape(-1, 1)

    clients_data.append((x_train, y_train))
    # x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
    # y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

# Generate synthetic data for federated clients
# def create_synthetic_data(num_clients, num_samples, input_dim):
#     client_data = []
#     for i in range(num_clients):
#         np.random.seed(i)
#         X = np.random.rand(num_samples, input_dim)
#         y = X.sum(axis=1) + np.random.normal(scale=0.1, size=num_samples)  # Simple regression target
#         client_data.append((X, y))
#     return client_data

# Preprocess data for TFF
def preprocess_client_data(client_data):
    def create_tf_dataset(X, y):
        dataset = tf.data.Dataset.from_tensor_slices((X, y))
        return dataset.batch(100)  # Batch size of 10
    return [create_tf_dataset(X, y) for X, y in client_data]

# Define the model
def create_regression_model(input_dim):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(input_dim,)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)  # Regression output
    ])
    return model

# Wrap the Keras model for TFF
def model_fn():
    keras_model = create_regression_model(input_dim)
    return tff.learning.models.from_keras_model(
        keras_model=keras_model,
        input_spec=clients_data[0].element_spec,
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.MeanSquaredError()]
    )

# Configuration
num_clients = 5
num_samples = 1000
input_dim = 20
# client_data = preprocess_client_data(create_synthetic_data(num_clients, num_samples, input_dim))
clients_data = preprocess_client_data(clients_data)

# Federated averaging process
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn=model_fn,
    client_optimizer_fn=tff.learning.optimizers.build_adamw(learning_rate=0.01),#lambda: tf.keras.optimizers.Adam(learning_rate=0.001),
    server_optimizer_fn=tff.learning.optimizers.build_sgdm(learning_rate=1.0) #lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
)

# Initialize the federated learning process
state = training_process.initialize()

# Training configuration
NUM_ROUNDS = 100
loss_history = []

# client_metrics = {i: {'accuracy': [], 'loss': []} for i in range(num_clients)}

# for round_num in range(1, NUM_ROUNDS + 1):
#     print(f"Round {round_num}...")
#     # Simulate client computation
#     round_metrics = []
#     for client_idx, client_ds in enumerate(client_data):
#         keras_model = create_regression_model(input_dim=input_dim)
#         keras_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
#         keras_model.set_weights([np.array(w.numpy()) for w in state.model.trainable])
#         history = keras_model.fit(client_ds, epochs=1, verbose=0)
#         client_metrics[client_idx]['accuracy'].append(history.history['binary_accuracy'][-1])
#         client_metrics[client_idx]['loss'].append(history.history['loss'][-1])
#         round_metrics.append((client_idx, history.history))

#     # Aggregate client updates
#     state, metrics = training_process.next(state, client_data)

# # Plot metrics
# for client_idx in range(num_clients):
#     plt.figure(figsize=(12, 5))
    
#     # Accuracy plot
#     plt.subplot(1, 2, 1)
#     plt.plot(client_metrics[client_idx]['accuracy'], label=f'Client {client_idx} Accuracy')
#     plt.title(f"Client {client_idx} Accuracy")
#     plt.xlabel("Round")
#     plt.ylabel("Accuracy")
#     plt.legend()
    
#     # Loss plot
#     plt.subplot(1, 2, 2)
#     plt.plot(client_metrics[client_idx]['loss'], label=f'Client {client_idx} Loss')
#     plt.title(f"Client {client_idx} Loss")
#     plt.xlabel("Round")
#     plt.ylabel("Loss")
#     plt.legend()
    
#     plt.tight_layout()
#     plt.show()

# Simulate federated learning rounds
for round_num in range(1, NUM_ROUNDS + 1):
    state, metrics = training_process.next(state, clients_data)
    loss = metrics['client_work']['train']['loss']
    loss_history.append(loss)
    print(f"Round {round_num}: Loss = {loss:.4f}")

# Retrieve final model weights
final_model_weights = state.global_model_weights
print("Final model weights: ", final_model_weights)

# Save the final model
# final_model = create_regression_model(input_dim)
# final_model.set_weights(final_model_weights)
# final_model.save("federated_regression_model.h5")

# print("Federated learning completed. Final model saved.")

# Plot loss history
plt.figure(figsize=(10, 6))
plt.plot(range(1, NUM_ROUNDS + 1), loss_history, marker='o', label='Training Loss')
plt.title("Federated Learning: Training Loss Across Rounds")
plt.xlabel("Round")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
def convert_YN_to_binary(array):
    # Use numpy where to convert 'Y' to 1 and 'N' to 0
    binary_array = np.where(array == 'Y', 1, 0)
    return binary_array

def average_previous_rows(data, mask):
    result = data.copy()  # Copy the data array to store results
    
    for i in range(2, len(data)):  # Start from the third row (index 2)
        if np.all(mask[i] == 0):  # Check if the entire row in the mask is 0
            result[i] = (data[i-1] + data[i-2]) / 2  # Average the previous two rows
    
    return result
