In [3]:
import tensorflow as tf
import numpy as np
from scipy.stats import ks_2samp, chi2_contingency

# Define the metrics function
def perform_differential_testing(predictions_i, predictions_j, labels, data_type):
    if predictions_i.ndim == 1:
        predictions_i = np.expand_dims(predictions_i, axis=1)
    if predictions_j.ndim == 1:
        predictions_j = np.expand_dims(predictions_j, axis=1)
    
    pred_class_i = np.argmax(predictions_i, axis=1)
    pred_class_j = np.argmax(predictions_j, axis=1)
    
    Δ_class = np.sum(pred_class_i != pred_class_j)
    Δ_score = np.sum(predictions_i != predictions_j)
    P_KS = ks_2samp(predictions_i.flatten(), predictions_j.flatten()).pvalue
    contingency = np.array([[np.sum((pred_class_i == k) & (pred_class_j == l)) for l in range(10)] for k in range(10)])
    contingency += 1  # Add-one smoothing
    P_X2 = chi2_contingency(contingency)[1]

    return Δ_class, Δ_score, P_KS, P_X2

# Custom function to determine if a model is an outlier
def is_outlier(metric_data, epsilon=0.2, min_samples=2):
    num_points = metric_data.shape[0]
    distances = np.linalg.norm(metric_data[:, np.newaxis] - metric_data, axis=2)
    neighbors = np.sum(distances < epsilon, axis=1)
    outliers = neighbors < min_samples
    return 

# This function calculates the distances between points in the metric_data array and counts the number of neighbors within a specified distance (epsilon).
# If the number of neighbors is less than min_samples, the point is considered an outlier.
# The is_outlier function is then used in the federated_learning_process to detect outliers:
# This approach ensures that the custom DBSCAN-like functionality is used to detect outliers in the federated learning process. 





# Create a simple model
def create_model():
    return tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation='softmax', input_shape=(20,))
    ])

# Create a federated learning process
def model_fn():
    model = create_model()
    return tff.learning.from_keras_model(
        model,
        input_spec=(tf.TensorSpec(shape=[None, 20], dtype=tf.float32),
                    tf.TensorSpec(shape=[None], dtype=tf.int32)),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

# Simulated federated learning setup
def create_fake_model(bug=False):
    model = create_model()
    if bug:
        optimizer = tf.keras.optimizers.Adam(learning_rate=10.0)  # Extremely high learning rate
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  # Normal learning rate
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Simulated test data on server
server_test_data = np.random.random((100, 20))  # 100 samples, 20 features each
server_test_labels = np.random.randint(0, 10, 100)  # 100 labels for 10 classes

# Simulated client models
client_models = [create_fake_model(bug=(i == 0)) for i in range(10)]  # Introduce bug in the first client

# Print learning rates of each client model
for i, client_model in enumerate(client_models):
    print(f"Client {i} learning rate: {client_model.optimizer.learning_rate.numpy()}")

# Train client models
for client_model in client_models:
    client_model.fit(server_test_data, server_test_labels, epochs=1, verbose=0)

# Perform federated learning process
def federated_learning_process(server_model, client_data, epsilon=0.8, min_samples=2):
    server_predictions = server_model.predict(server_test_data)
    server_predictions = np.argmax(server_predictions, axis=1)

    # Store metrics for all clients
    metrics_data = []

    for client_id, client_model in enumerate(client_data):
        client_predictions = client_model.predict(server_test_data)
        if client_predictions.ndim == 1:
            client_predictions = np.expand_dims(client_predictions, axis=1)
        client_predictions = np.argmax(client_predictions, axis=1)  # Ensure correct dimensions
        Δ_class, Δ_score, P_KS, P_X2 = perform_differential_testing(
            server_predictions, client_predictions, server_test_labels, f"Client {client_id}"
        )
        metrics_data.append([Δ_class, Δ_score, P_KS, P_X2])

    # Convert metrics data to numpy array
    metrics_data = np.array(metrics_data)
    
    # Check for outliers
    outliers = is_outlier(metrics_data, epsilon=epsilon, min_samples=min_samples)
    
    for i, outlier in enumerate(outliers):
        if outlier:
            print(f"Warning: Client {i} is an outlier. Potential bug detected.")
            return  # Stop the process if an outlier is detected

# Perform federated learning process
federated_learning_process(server_model=create_fake_model(), client_data=client_models)


Client 0 learning rate: 10.0
Client 1 learning rate: 0.0010000000474974513
Client 2 learning rate: 0.0010000000474974513
Client 3 learning rate: 0.0010000000474974513
Client 4 learning rate: 0.0010000000474974513
Client 5 learning rate: 0.0010000000474974513
Client 6 learning rate: 0.0010000000474974513
Client 7 learning rate: 0.0010000000474974513
Client 8 learning rate: 0.0010000000474974513
Client 9 learning rate: 0.0010000000474974513




1. **Import Libraries**:
   - `tensorflow` for building and training models.
   - `numpy` for numerical operations.
   - `scipy.stats` for statistical tests.

2. **Define the Metrics Function**:
   - `perform_differential_testing`: This function compares predictions from two models (`predictions_i` and `predictions_j`) and calculates several metrics:
     - `Δ_class`: The number of times the predicted classes differ.
     - `Δ_score`: The sum of differences in prediction scores.
     - `P_KS`: The p-value from the Kolmogorov-Smirnov test comparing the distributions of the predictions.
     - `P_X2`: The p-value from the chi-squared test comparing the contingency table of predicted classes.

3. **Custom Outlier Detection Function**:
   - `is_outlier`: This function calculates the distances between points in the `metric_data` array and counts the number of neighbors within a specified distance (`epsilon`). If the number of neighbors is less than `min_samples`, the point is considered an outlier.

4. **Create a Simple Model**:
   - `create_model`: This function creates a simple neural network model with one dense layer using the `softmax` activation function.

5. **Create a Federated Learning Process**:
   - `model_fn`: This function creates a federated learning model using TensorFlow Federated (TFF).

6. **Simulated Federated Learning Setup**:
   - `create_fake_model`: This function creates a model with either a normal learning rate (`0.001`) or an extremely high learning rate (`10.0`) to introduce a bug.

7. **Simulated Test Data on Server**:
   - `server_test_data` and `server_test_labels`: These are simulated test data and labels used for evaluation.

8. **Simulated Client Models**:
   - `client_models`: This list contains models for 10 clients, with the first client having a bug (extremely high learning rate).

9. **Print Learning Rates of Each Client Model**:
   - This loop prints the learning rate of each client model to verify the bug introduction.

10. **Train Client Models**:
    - This loop trains each client model on the simulated test data.

11. **Perform Federated Learning Process**:
    - `federated_learning_process`: This function performs the federated learning process, including:
      - Predicting with the server model.
      - Comparing predictions from client models with the server model using `perform_differential_testing`.
      - Storing metrics for each client.
      - Detecting outliers using the custom `is_outlier` function.
      - Printing a warning if an outlier (potential bug) is detected.

In summary, this code sets up a federated learning environment, introduces a bug in one of the client models, and uses differential testing and a custom DBSCAN-like approach to detect the bug before updating the server model.

The evaluation using the `perform_differential_testing` function is a comparison between each client model and the server model. Here's a breakdown of how it works:

1. **Server Model Predictions**:
   - The server model makes predictions on the test data (`server_predictions`).

2. **Client Model Predictions**:
   - Each client model makes predictions on the same test data (`client_predictions`).

3. **Comparison**:
   - The `perform_differential_testing` function compares the predictions from each client model (`client_predictions`) with the predictions from the server model (`server_predictions`).
   - This comparison calculates several metrics:
     - `Δ_class`: The number of times the predicted classes differ between the server model and the client model.
     - `Δ_score`: The sum of differences in prediction scores between the server model and the client model.
     - `P_KS`: The p-value from the Kolmogorov-Smirnov test comparing the distributions of the predictions from the server model and the client model.
     - `P_X2`: The p-value from the chi-squared test comparing the contingency table of predicted classes from the server model and the client model.

4. **Outlier Detection**:
   - The metrics for each client model are stored in `metrics_data`.
   - The `is_outlier` function is used to detect outliers in the `metrics_data`, identifying any client models that significantly deviate from the server model.

In summary, the evaluation is a comparison between each client model and the server model to detect any significant deviations that might indicate a bug in the client model.

In [4]:
import tensorflow as tf
import numpy as np
from scipy.stats import ks_2samp, chi2_contingency

# Define the metrics function
def perform_differential_testing(predictions_i, predictions_j, labels, data_type):
    if predictions_i.ndim == 1:
        predictions_i = np.expand_dims(predictions_i, axis=1)
    if predictions_j.ndim == 1:
        predictions_j = np.expand_dims(predictions_j, axis=1)
    
    pred_class_i = np.argmax(predictions_i, axis=1)
    pred_class_j = np.argmax(predictions_j, axis=1)
    
    Δ_class = np.sum(pred_class_i != pred_class_j)
    Δ_score = np.sum(predictions_i != predictions_j)
    P_KS = ks_2samp(predictions_i.flatten(), predictions_j.flatten()).pvalue
    contingency = np.array([[np.sum((pred_class_i == k) & (pred_class_j == l)) for l in range(10)] for k in range(10)])
    contingency += 1  # Add-one smoothing
    P_X2 = chi2_contingency(contingency)[1]

    return Δ_class, Δ_score, P_KS, P_X2

# Custom function to determine if a model is an outlier
def is_outlier(metric_data, epsilon=0.1, min_samples=3):  # Modified parameters
    num_points = metric_data.shape[0]
    distances = np.linalg.norm(metric_data[:, np.newaxis] - metric_data, axis=2)
    neighbors = np.sum(distances < epsilon, axis=1)
    outliers = neighbors < min_samples
    return outliers

# Create a simple model
def create_model():
    return tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation='softmax', input_shape=(20,))
    ])

# Create a federated learning process
def model_fn():
    model = create_model()
    return tff.learning.from_keras_model(
        model,
        input_spec=(tf.TensorSpec(shape=[None, 20], dtype=tf.float32),
                    tf.TensorSpec(shape=[None], dtype=tf.int32)),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

# Simulated federated learning setup
def create_fake_model(bug=False):
    model = create_model()
    if bug:
        optimizer = tf.keras.optimizers.Adam(learning_rate=10.0)  # Extremely high learning rate
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  # Normal learning rate
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Simulated test data on server
server_test_data = np.random.random((100, 20))  # 100 samples, 20 features each
server_test_labels = np.random.randint(0, 10, 100)  # 100 labels for 10 classes

# Simulated client models
client_models = [create_fake_model(bug=(i == 0)) for i in range(10)]  # Introduce bug in the first client

# Print learning rates of each client model
for i, client_model in enumerate(client_models):
    print(f"Client {i} learning rate: {client_model.optimizer.learning_rate.numpy()}")

# Train client models
for client_model in client_models:
    client_model.fit(server_test_data, server_test_labels, epochs=1, verbose=0)

# Perform federated learning process
def federated_learning_process(server_model, client_data, epsilon=0.1, min_samples=3):  # Modified parameters
    server_predictions = server_model.predict(server_test_data)
    server_predictions = np.argmax(server_predictions, axis=1)

    # Store metrics for all clients
    metrics_data = []

    for client_id, client_model in enumerate(client_data):
        client_predictions = client_model.predict(server_test_data)
        if client_predictions.ndim == 1:
            client_predictions = np.expand_dims(client_predictions, axis=1)
        client_predictions = np.argmax(client_predictions, axis=1)  # Ensure correct dimensions
        Δ_class, Δ_score, P_KS, P_X2 = perform_differential_testing(
            server_predictions, client_predictions, server_test_labels, f"Client {client_id}"
        )
        metrics_data.append([Δ_class, Δ_score, P_KS, P_X2])

    # Convert metrics data to numpy array
    metrics_data = np.array(metrics_data)
    
    # Check for outliers
    outliers = is_outlier(metrics_data, epsilon=epsilon, min_samples=min_samples)
    
    for i, outlier in enumerate(outliers):
        if outlier:
            print(f"Warning: Client {i} is an outlier. Potential bug detected.")
            return  # Stop the process if an outlier is detected

# Perform federated learning process
federated_learning_process(server_model=create_fake_model(), client_data=client_models)


Client 0 learning rate: 10.0
Client 1 learning rate: 0.0010000000474974513
Client 2 learning rate: 0.0010000000474974513
Client 3 learning rate: 0.0010000000474974513
Client 4 learning rate: 0.0010000000474974513
Client 5 learning rate: 0.0010000000474974513
Client 6 learning rate: 0.0010000000474974513
Client 7 learning rate: 0.0010000000474974513
Client 8 learning rate: 0.0010000000474974513
Client 9 learning rate: 0.0010000000474974513


using Mnist to implement the federated process

In [5]:
import tensorflow as tf
import numpy as np
from scipy.stats import ks_2samp, chi2_contingency
from tensorflow.keras.datasets import mnist

# Define the metrics function
def perform_differential_testing(predictions_i, predictions_j, labels, data_type):
    if predictions_i.ndim == 1:
        predictions_i = np.expand_dims(predictions_i, axis=1)
    if predictions_j.ndim == 1:
        predictions_j = np.expand_dims(predictions_j, axis=1)
    
    pred_class_i = np.argmax(predictions_i, axis=1)
    pred_class_j = np.argmax(predictions_j, axis=1)
    
    Δ_class = np.sum(pred_class_i != pred_class_j)
    Δ_score = np.sum(predictions_i != predictions_j)
    P_KS = ks_2samp(predictions_i.flatten(), predictions_j.flatten()).pvalue
    contingency = np.array([[np.sum((pred_class_i == k) & (pred_class_j == l)) for l in range(10)] for k in range(10)])
    contingency += 1  # Add-one smoothing
    P_X2 = chi2_contingency(contingency)[1]

    return Δ_class, Δ_score, P_KS, P_X2

# Custom function to determine if a model is an outlier
def is_outlier(metric_data, epsilon=0.1, min_samples=3):  # Modified parameters
    num_points = metric_data.shape[0]
    distances = np.linalg.norm(metric_data[:, np.newaxis] - metric_data, axis=2)
    neighbors = np.sum(distances < epsilon, axis=1)
    outliers = neighbors < min_samples
    return outliers

# Create a simple model
def create_model():
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

# Create a federated learning process
def model_fn():
    model = create_model()
    return tff.learning.from_keras_model(
        model,
        input_spec=(tf.TensorSpec(shape=[None, 28, 28], dtype=tf.float32),
                    tf.TensorSpec(shape=[None], dtype=tf.int32)),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

# Simulated federated learning setup
def create_fake_model(bug=False):
    model = create_model()
    if bug:
        optimizer = tf.keras.optimizers.Adam(learning_rate=10.0)  # Extremely high learning rate
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  # Normal learning rate
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Load and preprocess MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0

# Simulated client models
client_models = [create_fake_model(bug=(i == 0)) for i in range(10)]  # Introduce bug in the first client

# Print learning rates of each client model
for i, client_model in enumerate(client_models):
    print(f"Client {i} learning rate: {client_model.optimizer.learning_rate.numpy()}")

# Train client models
for client_model in client_models:
    client_model.fit(train_images, train_labels, epochs=1, verbose=0)

# Perform federated learning process
def federated_learning_process(server_model, client_data, epsilon=0.1, min_samples=3):  # Modified parameters
    server_predictions = server_model.predict(test_images)
    server_predictions = np.argmax(server_predictions, axis=1)

    # Store metrics for all clients
    metrics_data = []

    for client_id, client_model in enumerate(client_data):
        client_predictions = client_model.predict(test_images)
        if client_predictions.ndim == 1:
            client_predictions = np.expand_dims(client_predictions, axis=1)
        client_predictions = np.argmax(client_predictions, axis=1)  # Ensure correct dimensions
        Δ_class, Δ_score, P_KS, P_X2 = perform_differential_testing(
            server_predictions, client_predictions, test_labels, f"Client {client_id}"
        )
        metrics_data.append([Δ_class, Δ_score, P_KS, P_X2])

    # Convert metrics data to numpy array
    metrics_data = np.array(metrics_data)
    
    # Check for outliers
    outliers = is_outlier(metrics_data, epsilon=epsilon, min_samples=min_samples)
    
    for i, outlier in enumerate(outliers):
        if outlier:
            print(f"Warning: Client {i} is an outlier. Potential bug detected.")
            return  # Stop the process if an outlier is detected

# Perform federated learning process
federated_learning_process(server_model=create_fake_model(), client_data=client_models)


Client 0 learning rate: 10.0
Client 1 learning rate: 0.0010000000474974513
Client 2 learning rate: 0.0010000000474974513
Client 3 learning rate: 0.0010000000474974513
Client 4 learning rate: 0.0010000000474974513
Client 5 learning rate: 0.0010000000474974513
Client 6 learning rate: 0.0010000000474974513
Client 7 learning rate: 0.0010000000474974513
Client 8 learning rate: 0.0010000000474974513
Client 9 learning rate: 0.0010000000474974513
