<a href="https://colab.research.google.com/github/shiri9/non-iid/blob/main/label_skew_statitistical-dirichlet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install TensorFlow and all dependencies explicitly compatible with TFF 0.87.0
%pip install tensorflow==2.15.0
%pip install tensorflow-federated==0.81.0
%pip install tensorflow-privacy==0.9.0
%pip install tensorflow-model-optimization==0.7.5
%pip install jax==0.4.14 jaxlib==0.4.14
%pip install google-vizier==0.1.11
%pip install dp-accounting==0.4.3
%pip install portpicker==1.6.0
%pip install scipy==1.9.3
%pip install numpy==1.25.2
%pip install protobuf==3.20.3
%pip install typing-extensions==4.7.1
%pip install googleapis-common-protos==1.61.0
%pip install dm-tree==0.1.8

In [None]:
!python --version

Python 3.11.12


In [2]:
!rm -rf /usr/local/lib/python3.11/dist-packages/jax_plugins

In [10]:
# Verify
import tensorflow as tf
import tensorflow_federated as tff

print("TF version:", tf.__version__)
print("TFF version:", tff.__version__)

TF version: 2.14.1
TFF version: 0.81.0


In [11]:
#cell 1
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from google.colab import drive

# Mount Google Drive to access data files
drive.mount('/content/drive')

# Load datasets
df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/kdd_train.csv')
df_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/kdd_test.csv')

# Define label mapping for attack categories (including all labels from train and test sets)
attack_mapping = {
    'normal': 0, 'neptune': 1, 'land': 1, 'back': 1, 'teardrop': 1, 'pod': 1, 'smurf': 1,
    'ipsweep': 2, 'nmap': 2, 'portsweep': 2, 'satan': 2,
    'mailbomb': 1, 'apache2': 1, 'processtable': 1,  # Missing DoS labels in test set
    'phf': 3, 'multihop': 3, 'warezclient': 3, 'warezmaster': 3, 'spy': 3, 'ftp_write': 3,
    'guess_passwd': 3, 'imap': 3,
    'buffer_overflow': 4, 'loadmodule': 4, 'perl': 4, 'rootkit': 4,
    # Ensure all test labels are included
    'mscan': 2, 'saint': 2, 'snmpgetattack': 3, 'snmpguess': 3, 'xlock': 3, 'xsnoop': 3,
    'httptunnel': 3, 'ps': 4, 'xterm': 4,
    'sendmail': 3, 'named': 3  # Missing labels in test set
}

# Apply the label mapping
df_train['labels'] = df_train['labels'].replace(attack_mapping)
df_test['labels'] = df_test['labels'].replace(attack_mapping)

# Verify the unique labels after mapping
print("Unique labels in train set:", df_train['labels'].unique())
print("Unique labels in test set:", df_test['labels'].unique())

# Dropping the irrelevant column 'num_outbound_cmds'
df_train = df_train.drop('num_outbound_cmds', axis=1)
df_test = df_test.drop('num_outbound_cmds', axis=1)

# Encoding categorical columns: 'protocol_type', 'service', 'flag'
categorical_columns = ['protocol_type', 'service', 'flag']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df_train[col] = le.fit_transform(df_train[col])
    df_test[col] = le.transform(df_test[col])  # Important: use transform for test set, not fit_transform

# Scaling numerical columns
numerical_columns = [
    'duration', 'src_bytes', 'dst_bytes', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'same_srv_rate',
    'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_srv_diff_host_rate', 'dst_host_rerror_rate',
    'dst_host_srv_rerror_rate', 'hot', 'num_compromised', 'num_root'
]

scaler = MinMaxScaler()
df_train[numerical_columns] = scaler.fit_transform(df_train[numerical_columns])
df_test[numerical_columns] = scaler.transform(df_test[numerical_columns])

# Convert to NumPy arrays and enforce correct types for TensorFlow
X_train = np.array(df_train.drop('labels', axis=1)).astype(np.float32)
y_train = np.array(df_train['labels']).astype(np.int32)

X_test = np.array(df_test.drop('labels', axis=1)).astype(np.float32)
y_test = np.array(df_test['labels']).astype(np.int32)

# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)

# Check dataset shapes
print("Train dataset shape:", X_train.shape, y_train.shape)
print("Test dataset shape:", X_test.shape, y_test.shape)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  df_train['labels'] = df_train['labels'].replace(attack_mapping)
  df_test['labels'] = df_test['labels'].replace(attack_mapping)


Unique labels in train set: [0 1 3 2 4]
Unique labels in test set: [0 2 1 3 4]
Train dataset shape: (125973, 40) (125973,)
Test dataset shape: (22544, 40) (22544,)


In [12]:
# ## Cell2: Create Non-IID Partitions with Dirichlet Label Skew (Modified Version)

import numpy as np
import pandas as pd

# Configuration
NUM_CLIENTS = 10
NUM_CLASSES = 5  # Based on your 5 attack categories (0-4)
ALPHA = 0.5      # Dirichlet concentration parameter (α=0.5 for moderate skew)
SEED = 42        # For reproducibility
MIN_SAMPLES_PER_CLIENT = 100  # Ensure clients have sufficient data

np.random.seed(SEED)  # Fix randomness for reproducibility

# Step 1: Generate client-specific label distributions using Dirichlet
client_label_probs = np.random.dirichlet([ALPHA]*NUM_CLASSES, size=NUM_CLIENTS)

# Initialize list to store partitions
data_partitions = []

# Create partitions using Dirichlet distributions
for client_id in range(NUM_CLIENTS):
    partition = pd.DataFrame()
    client_probs = client_label_probs[client_id]  # Probability vector for this client

    # For each class, sample data proportionally to Dirichlet probabilities
    for label in range(NUM_CLASSES):
        class_data = df_train[df_train['labels'] == label]
        if len(class_data) == 0:
            continue  # Skip empty classes

        # Calculate number of samples for this class
        num_samples = max(
            MIN_SAMPLES_PER_CLIENT // NUM_CLASSES,  # Minimum guarantee
            int(len(class_data) * client_probs[label])  # Dirichlet proportion
        )

        # Sample without replacement
        sampled_data = class_data.sample(n=num_samples, replace=False, random_state=SEED+client_id)
        partition = pd.concat([partition, sampled_data])

    # Final checks
    if len(partition) == 0:
        raise ValueError(f"Client {client_id} has no data!")

    # Shuffle and store
    data_partitions.append(
        partition.sample(frac=1, random_state=SEED).reset_index(drop=True)
    )

    # Print verification
    print(f"\nClient {client_id+1} Label Distribution (Dirichlet α={ALPHA}):")
    print(partition['labels'].value_counts().sort_index())
    print(f"Target distribution: {np.round(client_probs, 2)}")

print("\nSuccessfully created Dirichlet-based non-IID partitions!")



Client 1 Label Distribution (Dirichlet α=0.5):
labels
0     7854
1    25164
2      235
3       20
4       20
Name: count, dtype: int64
Target distribution: [0.12 0.55 0.02 0.   0.31]

Client 2 Label Distribution (Dirichlet α=0.5):
labels
0       20
1    35730
2      273
3       65
4       20
Name: count, dtype: int64
Target distribution: [0.   0.78 0.02 0.07 0.13]

Client 3 Label Distribution (Dirichlet α=0.5):
labels
0    23786
1     3526
2     2181
3       35
4       20
Name: count, dtype: int64
Target distribution: [0.35 0.08 0.19 0.04 0.35]

Client 4 Label Distribution (Dirichlet α=0.5):
labels
0       70
1    38573
2      268
3      131
4       20
Name: count, dtype: int64
Target distribution: [0.   0.84 0.02 0.13 0.  ]

Client 5 Label Distribution (Dirichlet α=0.5):
labels
0     20
1    754
2    277
3     73
4     46
Name: count, dtype: int64
Target distribution: [0.   0.02 0.02 0.07 0.89]

Client 6 Label Distribution (Dirichlet α=0.5):
labels
0    55215
1     5767
2       30
3 

In [13]:

# ## Cell3: Create Label-Skew Partitions

import numpy as np
import pandas as pd

# Configuration
NUM_CLIENTS = 10
CLASS_MAPPING = {'Benign': 0, 'DoS': 1, 'Probe': 2, 'U2R': 3, 'R2L': 4}
MIN_SAMPLES_PER_CLASS = 50  # Prevent class starvation

# Label distribution per client (matches your paper's setup)
client_class_map = {
    0: ['Benign', 'DoS'],
    1: ['Benign', 'Probe'],
    2: ['Benign', 'U2R'],
    3: ['Benign', 'R2L'],
    4: ['DoS', 'Probe'],
    5: ['DoS', 'U2R'],
    6: ['DoS', 'R2L'],
    7: ['Probe', 'U2R'],
    8: ['Probe', 'R2L'],
    9: ['U2R', 'R2L']
}

data_partitions = []
for client_id in range(NUM_CLIENTS):
    client_partition = pd.DataFrame()
    classes = client_class_map[client_id]

    for class_name in classes:
        label = CLASS_MAPPING[class_name]
        class_data = df_train[df_train['labels'] == label]

        # Dynamic sampling with minimum guarantee
        proportion = np.random.uniform(0.1, 0.4)  # 10-40% of class data
        num_samples = max(MIN_SAMPLES_PER_CLASS, int(len(class_data) * proportion))

        client_partition = pd.concat([
            client_partition,
            class_data.sample(n=num_samples, random_state=42+client_id)
        ])

    # Shuffle and store
    data_partitions.append(
        client_partition.sample(frac=1, random_state=42).reset_index(drop=True)
    )

    # Verification
    print(f"\nClient {client_id+1} Distribution:")
    print(client_partition['labels'].value_counts().sort_index())


Client 1 Distribution:
labels
0    15025
1    15002
Name: count, dtype: int64

Client 2 Distribution:
labels
0    11356
2     1434
Name: count, dtype: int64

Client 3 Distribution:
labels
0    12588
3      147
Name: count, dtype: int64

Client 4 Distribution:
labels
0    25516
4       50
Name: count, dtype: int64

Client 5 Distribution:
labels
1    13319
2     4212
Name: count, dtype: int64

Client 6 Distribution:
labels
1    15665
3      155
Name: count, dtype: int64

Client 7 Distribution:
labels
1    16890
4       50
Name: count, dtype: int64

Client 8 Distribution:
labels
2    3989
3     366
Name: count, dtype: int64

Client 9 Distribution:
labels
2    2277
4      50
Name: count, dtype: int64

Client 10 Distribution:
labels
3    167
4     50
Name: count, dtype: int64


In [14]:

# ## Cell4 : Create TensorFlow Datasets (Final Corrected Version)

import numpy as np
import tensorflow as tf

# Configuration
batch_size = 32
SEED = 42  # For reproducible shuffling

train_datasets = []
val_datasets = []

for client_id, partition in enumerate(data_partitions):
    # ========== Exact 90/10 Split ==========
    total_samples = len(partition)
    train_samples = (total_samples // 10) * 9  # Exact 90%
    val_samples = total_samples - train_samples  # Exact 10%

    # Shuffle with client-specific seed
    shuffled_partition = partition.sample(frac=1, random_state=SEED+client_id).reset_index(drop=True)

    # Split into train/val
    train_part = shuffled_partition.iloc[:train_samples]
    val_part = shuffled_partition.iloc[train_samples:]

    # ========== Feature/Label Conversion ==========
    # Training data
    train_features = train_part.drop(columns=['labels']).values.astype(np.float32)
    train_labels = train_part['labels'].values.astype(np.int32)

    # Validation data
    val_features = val_part.drop(columns=['labels']).values.astype(np.float32)
    val_labels = val_part['labels'].values.astype(np.int32)

    # ========== Dataset Creation ==========
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_features, train_labels)
    ).batch(batch_size)

    val_dataset = tf.data.Dataset.from_tensor_slices(
        (val_features, val_labels)
    ).batch(batch_size)

    # Store datasets
    train_datasets.append(train_dataset)
    val_datasets.append(val_dataset)

    # ========== Verification ==========
    print(f"Client {client_id+1}:")
    print(f"  Train: {len(train_part)} samples | Classes: {np.unique(train_labels)}")
    print(f"  Val: {len(val_part)} samples | Classes: {np.unique(val_labels)}\n")

# ========== Test Dataset ==========
test_features = df_test.drop(columns=['labels']).values.astype(np.float32)
test_labels = df_test['labels'].values.astype(np.int32)
test_dataset = tf.data.Dataset.from_tensor_slices(
    (test_features, test_labels)
).batch(batch_size)

print("=== Final Verification ===")
print(f"Total training clients: {len(train_datasets)}")
print(f"Test samples: {len(test_labels)}")
print(f"Test features shape: {test_features.shape}")

Client 1:
  Train: 27018 samples | Classes: [0 1]
  Val: 3009 samples | Classes: [0 1]

Client 2:
  Train: 11511 samples | Classes: [0 2]
  Val: 1279 samples | Classes: [0 2]

Client 3:
  Train: 11457 samples | Classes: [0 3]
  Val: 1278 samples | Classes: [0 3]

Client 4:
  Train: 23004 samples | Classes: [0 4]
  Val: 2562 samples | Classes: [0 4]

Client 5:
  Train: 15777 samples | Classes: [1 2]
  Val: 1754 samples | Classes: [1 2]

Client 6:
  Train: 14238 samples | Classes: [1 3]
  Val: 1582 samples | Classes: [1 3]

Client 7:
  Train: 15246 samples | Classes: [1 4]
  Val: 1694 samples | Classes: [1 4]

Client 8:
  Train: 3915 samples | Classes: [2 3]
  Val: 440 samples | Classes: [2 3]

Client 9:
  Train: 2088 samples | Classes: [2 4]
  Val: 239 samples | Classes: [2 4]

Client 10:
  Train: 189 samples | Classes: [3 4]
  Val: 28 samples | Classes: [3 4]

=== Final Verification ===
Total training clients: 10
Test samples: 22544
Test features shape: (22544, 40)


In [5]:

# ## Cell5: Centralized Training (Label Skew Version)

import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

def centralized_training(seed=42):
    tf.keras.utils.set_random_seed(seed)

    # 1. Combine all client partitions (NEW FOR LABEL SKEW)
    full_train = pd.concat(data_partitions).sample(frac=1, random_state=seed)
    full_train_features = full_train.drop('labels', axis=1).values.astype(np.float32)
    full_train_labels = full_train['labels'].values.astype(np.int32)

    # 2. Create model (same architecture as FL)
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(40,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(5, activation='softmax')
    ])

    # 3. Training with metrics tracking
    model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

    history = model.fit(
        full_train_features, full_train_labels,
        epochs=30,
        batch_size=32,
        validation_data=(test_features, test_labels),  # Use your existing test data
        verbose=0
    )

    # 4. Final evaluation
    y_pred = np.argmax(model.predict(test_features), axis=1)

    return {
        'seed': seed,
        'train_loss': history.history['loss'],
        'val_loss': history.history['val_loss'],
        'test_accuracy': history.history['val_accuracy'][-1],
        'test_precision': precision_score(y_test, y_pred, average='macro', zero_division=0),
        'test_recall': recall_score(y_test, y_pred, average='macro', zero_division=0),
        'test_f1': f1_score(y_test, y_pred, average='macro', zero_division=0)
    }

# Run with multiple seeds
results_cl = [centralized_training(seed=s) for s in [42, 123, 456]]

# Generate report
report_cl = pd.DataFrame(results_cl)
print("\nCentralized Training Results (Label Skew Scenario):")
display(report_cl[['seed', 'test_accuracy', 'test_precision', 'test_recall', 'test_f1']])


Centralized Training Results (Label Skew Scenario):


Unnamed: 0,seed,test_accuracy,test_precision,test_recall,test_f1
0,42,0.929915,0.953275,0.670655,0.728932
1,123,0.924814,0.855777,0.692183,0.71754
2,456,0.933463,0.911781,0.724816,0.769453


In [6]:
# ## Cell6: Federated Learning with Statistical Significance
import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import collections
from scipy import stats

# ======================
# 1. Data Preprocessing
# ======================

def preprocess(dataset):
    def batch_format_fn(features, labels):
        return collections.OrderedDict(
            x=tf.reshape(features, [-1, 40]),  # Flatten features
            y=tf.reshape(labels, [-1])  # Reshape labels
        )
    padded_shapes = ([None, 40], [None])
    return dataset.padded_batch(32, padded_shapes=padded_shapes).map(batch_format_fn).prefetch(tf.data.experimental.AUTOTUNE)

# ======================
# 2. Core FL Functions
# ======================

def create_keras_model():
    return tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(40,)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(5, activation='softmax')
    ])

def make_federated_data(client_data, client_ids):
    """Global function to create federated datasets"""
    return [
        preprocess(client_data[i])
        for i in client_ids
        if len(list(client_data[i])) > 0  # Filter empty datasets
    ]

def run_fl_trial(seed=42, num_rounds=30):
    """Run complete FL pipeline with specified seed"""
    # Set all seeds
    tf.keras.utils.set_random_seed(seed)
    np.random.seed(seed)

    # Model function
    def model_fn():
        return tff.learning.models.from_keras_model(
            create_keras_model(),
            input_spec=federated_train_data[0].element_spec,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
        )

    # Build training process
    training_process = tff.learning.algorithms.build_weighted_fed_avg(
        model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.001),
        server_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.01)
    )

    # Training loop
    state = training_process.initialize()
    for _ in range(num_rounds):
        state = training_process.next(state, federated_train_data).state

    # Evaluation
    eval_model = create_keras_model()
    eval_model.set_weights(list(training_process.get_model_weights(state).trainable))
    y_pred = np.argmax(eval_model.predict(test_features), axis=1)


    return {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='macro', zero_division=0),
        'recall': recall_score(y_test, y_pred, average='macro', zero_division=0),
        'f1': f1_score(y_test, y_pred, average='macro', zero_division=0)
    }

# ======================
# 3. Execution & Analysis
# ======================

# Configuration
NUM_CLIENTS = 10
SEEDS = [42, 123, 456]

# Create federated data (ensure train_datasets exists)
federated_train_data = make_federated_data(train_datasets, list(range(NUM_CLIENTS)))

# Run trials
fl_results = [run_fl_trial(seed=s) for s in SEEDS]

# Statistical comparison with centralized results (assuming results_cl exists)
def print_stat_comparison(cl_results, fl_results):
    for metric in ['accuracy', 'precision', 'recall', 'f1']:
        cl_values = [r[f'test_{metric}'] for r in cl_results]
        fl_values = [r[metric] for r in fl_results]
        t_stat, p_value = stats.ttest_ind(cl_values, fl_values)

        print(f"\n{metric.upper():<10} CL: {np.mean(cl_values):.4f} ± {np.std(cl_values):.4f}")
        print(f"{'FL:':<10} {np.mean(fl_values):.4f} ± {np.std(fl_values):.4f}")
        print(f"{'p-value:':<10} {p_value:.4e}{'*' if p_value < 0.05 else ''}")

print("\n=== Statistical Significance ===")
print_stat_comparison(results_cl, fl_results)


=== Statistical Significance ===

ACCURACY   CL: 0.9294 ± 0.0036
FL:        0.8395 ± 0.0114
p-value:   4.3965e-04*

PRECISION  CL: 0.9069 ± 0.0399
FL:        0.4738 ± 0.0906
p-value:   3.4658e-03*

RECALL     CL: 0.6959 ± 0.0223
FL:        0.4063 ± 0.0186
p-value:   1.4659e-04*

F1         CL: 0.7386 ± 0.0223
FL:        0.3991 ± 0.0293
p-value:   1.9983e-04*


In [7]:
import numpy as np
unique_labels, counts = np.unique(y_test, return_counts=True)
for label, count in zip(unique_labels, counts):
    print(f"Class {label}: {count} samples ({count/len(y_test):.1%})")

Class 0: 11245 samples (49.9%)
Class 1: 8095 samples (35.9%)
Class 2: 2157 samples (9.6%)
Class 3: 1009 samples (4.5%)
Class 4: 38 samples (0.2%)


In [22]:
# ## Cell5: Enhanced Centralized Training with Full Metrics
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt

def centralized_training(seed=42, epochs=30):
    tf.keras.utils.set_random_seed(seed)

    # 1. Combine all client data
    full_train = pd.concat(data_partitions).sample(frac=1, random_state=seed)
    X_train = full_train.drop('labels', axis=1).values.astype(np.float32)
    y_train = full_train['labels'].values.astype(np.int32)

    # 2. Model definition
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(40,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(5, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # 3. Custom callback for metrics
    class CLMetrics(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            # Test set evaluation
            y_pred = np.argmax(model.predict(test_features), axis=1)
            logs['test_precision'] = precision_score(y_test, y_pred, average='macro', zero_division=0)
            logs['test_recall'] = recall_score(y_test, y_pred, average='macro', zero_division=0)
            logs['test_f1'] = f1_score(y_test, y_pred, average='macro', zero_division=0)

            # Interval reporting
            if (epoch+1) % 5 == 0:
                print(f"\nEpoch {epoch+1}/{epochs}:")
                print(f"Train Loss: {logs['loss']:.4f} | Test Loss: {logs['val_loss']:.4f}")
                print(f"Accuracy: {logs['val_accuracy']:.4f} | F1: {logs['test_f1']:.4f}")

    # 4. Training
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=32,
        validation_data=(test_features, test_labels),
        callbacks=[CLMetrics()],
        verbose=0
    )

    # 5. Calculate RMSE
    train_rmse = np.sqrt(np.mean(np.square(history.history['loss'])))
    test_rmse = np.sqrt(np.mean(np.square(history.history['val_loss'])))

    return {
        'train_loss': history.history['loss'],
        'test_loss': history.history['val_loss'],
        'test_accuracy': history.history['val_accuracy'],
        'test_precision': [history.history[f'test_precision'][i] for i in range(epochs)],
        'test_recall': [history.history[f'test_recall'][i] for i in range(epochs)],
        'test_f1': [history.history[f'test_f1'][i] for i in range(epochs)],
        'train_rmse': train_rmse,
        'test_rmse': test_rmse
    }

# Run centralized training
cl_results = centralized_training()
print(f"\nFinal RMSE - Train: {cl_results['train_rmse']:.4f}, Test: {cl_results['test_rmse']:.4f}")


Epoch 5/30:
Train Loss: 0.0260 | Test Loss: 0.7114
Accuracy: 0.9142 | F1: 0.7121

Epoch 10/30:
Train Loss: 0.0190 | Test Loss: 0.7805
Accuracy: 0.9269 | F1: 0.7333

Epoch 15/30:
Train Loss: 0.0148 | Test Loss: 0.8244
Accuracy: 0.9291 | F1: 0.7439

Epoch 20/30:
Train Loss: 0.0129 | Test Loss: 1.0537
Accuracy: 0.9299 | F1: 0.7547

Epoch 25/30:
Train Loss: 0.0124 | Test Loss: 1.0186
Accuracy: 0.9316 | F1: 0.7400

Epoch 30/30:
Train Loss: 0.0118 | Test Loss: 1.2363
Accuracy: 0.9299 | F1: 0.7289

Final RMSE - Train: 0.0269, Test: 0.8993


In [23]:
# ## Cell6: Federated Learning with Client Metrics (Fixed)
import tensorflow_federated as tff
import time

def run_fl_trial(seed=42, num_rounds=30):
    tf.keras.utils.set_random_seed(seed)
    np.random.seed(seed)

    # 1. Initialize TFF process
    def model_fn():
        keras_model = create_keras_model()
        return tff.learning.models.from_keras_model(
            keras_model,
            input_spec=federated_train_data[0].element_spec,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
        )

    training_process = tff.learning.algorithms.build_weighted_fed_avg(
        model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.001),
        server_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.01)
    )

    state = training_process.initialize()

    # 2. Metric containers
    metrics = {
        'train_loss': [],
        'test_loss': [],
        'test_acc': [],
        'test_prec': [],
        'test_rec': [],
        'test_f1': [],
        'client_metrics': {cid: [] for cid in range(NUM_CLIENTS)}
    }

    # 3. TFF-compatible evaluation function
    @tf.function
    def evaluate_model(model, dataset):
        metric = tf.keras.metrics.SparseCategoricalCrossentropy()
        acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
        for batch in dataset:
            preds = model(batch['x'])
            metric.update_state(batch['y'], preds)
            acc_metric.update_state(batch['y'], preds)
        return metric.result().numpy(), acc_metric.result().numpy()

    # 4. Training loop
    for round_num in range(num_rounds):
        # 4a. Train one round
        result = training_process.next(state, federated_train_data)
        state = result.state
        metrics['train_loss'].append(result.metrics['client_work']['train']['loss'])

        # 4b. Global evaluation
        global_model = training_process.get_model_weights(state)
        keras_model = create_keras_model()
        keras_model.set_weights(global_model)

        # Convert test data to TFF-compatible format
        test_dataset = tf.data.Dataset.from_tensor_slices(
            collections.OrderedDict(x=test_features, y=test_labels)
        ).batch(32)
        test_loss, test_acc = evaluate_model(keras_model, test_dataset)

        y_pred = np.argmax(keras_model.predict(test_features), axis=1)
        metrics['test_loss'].append(test_loss)
        metrics['test_acc'].append(test_acc)
        metrics['test_prec'].append(precision_score(y_test, y_pred, average='macro', zero_division=0))
        metrics['test_rec'].append(recall_score(y_test, y_pred, average='macro', zero_division=0))
        metrics['test_f1'].append(f1_score(y_test, y_pred, average='macro', zero_division=0))

        # 4c. Per-client evaluation
        for cid in range(NUM_CLIENTS):
            client_dataset = val_datasets[cid]
            client_loss, client_acc = evaluate_model(keras_model, client_dataset)

            y_pred_client = np.argmax(keras_model.predict(client_dataset), axis=1)
            y_true_client = np.concatenate([y for _, y in client_dataset])

            metrics['client_metrics'][cid].append({
                'accuracy': client_acc,
                'precision': precision_score(y_true_client, y_pred_client, average='macro', zero_division=0),
                'recall': recall_score(y_true_client, y_pred_client, average='macro', zero_division=0),
                'f1': f1_score(y_true_client, y_pred_client, average='macro', zero_division=0)
            })

        # 4d. Interval reporting
        if (round_num+1) % 5 == 0:
            print(f"\nRound {round_num+1}/{num_rounds}:")
            print(f"Global Test Loss: {metrics['test_loss'][-1]:.4f}")
            print(f"Global Accuracy: {metrics['test_acc'][-1]:.4f}")
            print(f"Client 0 F1: {metrics['client_metrics'][0][-1]['f1']:.4f}")

    # 5. Calculate RMSE
    metrics['train_rmse'] = np.sqrt(np.mean(np.square(metrics['train_loss']))
    metrics['test_rmse'] = np.sqrt(np.mean(np.square(metrics['test_loss']))

    return metrics

# Execute federated training
fl_results = run_fl_trial()
print(f"\nFinal RMSE: Train={fl_results['train_rmse']:.4f}, Test={fl_results['test_rmse']:.4f}")

RuntimeError: resource: Attempting to capture an EagerTensor without building a function.

In [None]:
# ## Cell7: Visualization of Results
def plot_results(cl_res, fl_res):
    plt.figure(figsize=(15, 6))

    # Loss curves
    plt.subplot(1, 2, 1)
    plt.plot(cl_res['train_loss'], label='CL Train')
    plt.plot(cl_res['test_loss'], label='CL Test')
    plt.plot(fl_res['train_loss'], '--', label='FL Train')
    plt.plot(fl_res['test_loss'], '--', label='FL Test')
    plt.title('Loss Curves')
    plt.xlabel('Rounds/Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy/F1 comparison
    plt.subplot(1, 2, 2)
    plt.plot(cl_res['test_accuracy'], label='CL Accuracy')
    plt.plot(cl_res['test_f1'], label='CL F1')
    plt.plot(fl_res['test_acc'], '--', label='FL Accuracy')
    plt.plot(fl_res['test_f1'], '--', label='FL F1')
    plt.title('Performance Metrics')
    plt.xlabel('Rounds/Epochs')
    plt.ylabel('Score')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Generate plots
plot_results(cl_results, fl_results)