In [1]:
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
from scipy.stats import ks_2samp, chi2_contingency
import concurrent.futures


In [2]:
# Load the MNIST dataset
mnist = tf.keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [3]:
# Normalize the data
X_train, X_test = X_train / 255.0, X_test / 255.0

In [4]:
# Define a model creation function
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    return model

In [5]:
# Convert Keras model to TFF model
def model_fn():
    keras_model = create_model()
    return tff.learning.from_keras_model(
        keras_model,
        input_spec=client_datasets[0].element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )

In [6]:
# Create dummy client datasets (reduced size)
num_clients = 10
client_datasets = [tf.data.Dataset.from_tensor_slices((np.random.rand(50, 28, 28), np.random.randint(0, 10, 50))).batch(10) for _ in range(num_clients)]

In [7]:
# Create federated learning algorithms for each client
federated_algorithms = [tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
) for _ in range(num_clients)]

In [8]:
# Function to run the training in a separate event loop
def run_training():
    tff.backends.native.set_local_execution_context()

    states = [algorithm.initialize() for algorithm in federated_algorithms]

    # Train the models on the clients' data (reduced to 2 rounds)
    for round_num in range(1, 3):
        print(f"Round {round_num}")
        for client_id in range(num_clients):
            states[client_id], _ = federated_algorithms[client_id].next(states[client_id], [client_datasets[client_id]])

        # Perform differential testing for each pair of clients
        for i in range(num_clients):
            for j in range(i+1, num_clients):
                # Get the model weights after training
                weights_i = states[i].model.trainable
                weights_j = states[j].model.trainable

                # Evaluate both models on train and test data
                train_predictions_i, train_labels = evaluate_model_on_data(weights_i, create_model, X_train[:1000], Y_train[:1000])
                test_predictions_i, test_labels = evaluate_model_on_data(weights_i, create_model, X_test[:1000], Y_test[:1000])
                train_predictions_j, _ = evaluate_model_on_data(weights_j, create_model, X_train[:1000], Y_train[:1000])
                test_predictions_j, _ = evaluate_model_on_data(weights_j, create_model, X_test[:1000], Y_test[:1000])

                # Perform differential testing
                print(f"Comparison between Client {i} and Client {j}:")
                perform_differential_testing(train_predictions_i, train_predictions_j, train_labels, "Train")
                perform_differential_testing(test_predictions_i, test_predictions_j, test_labels, "Test")
                print()

        # Average the predictions after differential testing
        average_predictions_train = np.mean([evaluate_model_on_data(state.model.trainable, create_model, X_train[:1000], Y_train[:1000])[0] for state in states], axis=0)
        average_predictions_test = np.mean([evaluate_model_on_data(state.model.trainable, create_model, X_test[:1000], Y_test[:1000])[0] for state in states], axis=0)

        print("Average predictions on Train Data:")
        print(average_predictions_train)
        print("Average predictions on Test Data:")
        print(average_predictions_test)

In [9]:
# Function to evaluate the model on data
def evaluate_model_on_data(weights, create_model_fn, X, Y):
    model = create_model_fn()
    model.set_weights(weights)
    predictions = model.predict(X)
    return predictions, Y

In [10]:
# Function to perform differential testing
def perform_differential_testing(predictions_i, predictions_j, labels, data_type):
    # Criterion 1: Absolute differences between classes
    pred_class_i = np.argmax(predictions_i, axis=1)
    pred_class_j = np.argmax(predictions_j, axis=1)
    Δ_class = np.sum(pred_class_i != pred_class_j)

    # Criterion 2: Absolute differences between scores
    Δ_score = np.sum(predictions_i != predictions_j)

    # Criterion 3: Significance of difference between scores
    P_KS = ks_2samp(predictions_i.flatten(), predictions_j.flatten()).pvalue

    # Criterion 4: Significance of difference between classifications
    contingency = np.array([[np.sum((pred_class_i == k) & (pred_class_j == l)) for l in range(10)] for k in range(10)])
    contingency += 1  # Add-one smoothing
    P_X2 = chi2_contingency(contingency)[1]

    print(f"{data_type} Data:")
    print(f"Δ_class: {Δ_class}")
    print(f"Δ_score: {Δ_score:.2f}")
    print(f"P_KS: {P_KS:.4f}")
    print(f"P_X2: {P_X2:.4f}")
    
    if P_KS < 0.05 or P_X2 < 0.05:
        print("Warning: Significant difference detected (p-value < 0.05)")

In [11]:
# Run the training
with concurrent.futures.ThreadPoolExecutor() as executor:
    future = executor.submit(run_training)
    future.result()


Round 1
Comparison between Client 0 and Client 1:
Train Data:
Δ_class: 803
Δ_score: 10000.00
P_KS: 0.0076
P_X2: 0.0000
Test Data:
Δ_class: 858
Δ_score: 10000.00
P_KS: 0.1230
P_X2: 0.0000

Comparison between Client 0 and Client 2:
Train Data:
Δ_class: 884
Δ_score: 10000.00
P_KS: 0.0000
P_X2: 0.0000
Test Data:
Δ_class: 861
Δ_score: 10000.00
P_KS: 0.0000
P_X2: 0.0000

Comparison between Client 0 and Client 3:
Train Data:
Δ_class: 883
Δ_score: 10000.00
P_KS: 0.0000
P_X2: 0.0000
Test Data:
Δ_class: 897
Δ_score: 10000.00
P_KS: 0.0116
P_X2: 0.0000

Comparison between Client 0 and Client 4:
Train Data:
Δ_class: 855
Δ_score: 10000.00
P_KS: 0.0000
P_X2: 0.0000
Test Data:
Δ_class: 870
Δ_score: 10000.00
P_KS: 0.1189
P_X2: 0.0000

Comparison between Client 0 and Client 5:
Train Data:
Δ_class: 980
Δ_score: 10000.00
P_KS: 0.0000
P_X2: 0.0000
Test Data:
Δ_class: 976
Δ_score: 10000.00
P_KS: 0.0000
P_X2: 0.0000

Comparison between Client 0 and Client 6:
Train Data:
Δ_class: 936
Δ_score: 10000.00
P_KS: 0