<a href="https://colab.research.google.com/github/shiri9/non-iid/blob/main/looped_labelskew.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install TensorFlow and all dependencies explicitly compatible with TFF 0.87.0
%pip install tensorflow==2.15.0
%pip install tensorflow-federated==0.81.0
%pip install tensorflow-privacy==0.9.0
%pip install tensorflow-model-optimization==0.7.5
%pip install jax==0.4.14 jaxlib==0.4.14
%pip install google-vizier==0.1.11
%pip install dp-accounting==0.4.3
%pip install portpicker==1.6.0
%pip install scipy==1.9.3
%pip install numpy==1.25.2
%pip install protobuf==3.20.3
%pip install typing-extensions==4.7.1
%pip install googleapis-common-protos==1.61.0
%pip install dm-tree==0.1.8

In [None]:
!rm -rf /usr/local/lib/python3.11/dist-packages/jax_plugins

In [None]:
# Verify
import tensorflow as tf
import tensorflow_federated as tff

print("TF version:", tf.__version__)
print("TFF version:", tff.__version__)

In [None]:
# ## Cell 1: Data Loading & Preprocessing

import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from google.colab import drive

# 1. Mount Google Drive to access data files
drive.mount('/content/drive')

# 2. Load the raw KDD train/test CSVs
df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/kdd_train.csv')
df_test  = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/kdd_test.csv')

# 3. Map attack labels into 5 categories (0: normal; 1: DoS; 2: Probe; 3: U2R; 4: R2L)
attack_mapping = {
    'normal': 0,
    'neptune': 1, 'land': 1, 'back': 1, 'teardrop': 1, 'pod': 1, 'smurf': 1,
    'ipsweep': 2, 'nmap': 2, 'portsweep': 2, 'satan': 2,
    'mailbomb': 1, 'apache2': 1, 'processtable': 1,
    'phf': 3, 'multihop': 3, 'warezclient': 3, 'warezmaster': 3,
    'spy': 3, 'ftp_write': 3, 'guess_passwd': 3, 'imap': 3,
    'buffer_overflow': 4, 'loadmodule': 4, 'perl': 4, 'rootkit': 4,
    'mscan': 2, 'saint': 2, 'snmpgetattack': 3, 'snmpguess': 3,
    'xlock': 3, 'xsnoop': 3, 'httptunnel': 3, 'ps': 4, 'xterm': 4,
    'sendmail': 3, 'named': 3
}

# 4. Apply the mapping
df_train['labels'] = df_train['labels'].replace(attack_mapping)
df_test['labels']  = df_test['labels'].replace(attack_mapping)

# 5. Drop the irrelevant column 'num_outbound_cmds' if it exists
if 'num_outbound_cmds' in df_train.columns:
    df_train = df_train.drop('num_outbound_cmds', axis=1)
if 'num_outbound_cmds' in df_test.columns:
    df_test = df_test.drop('num_outbound_cmds', axis=1)

# 6. Encode categorical columns: 'protocol_type', 'service', 'flag'
categorical_columns = ['protocol_type', 'service', 'flag']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df_train[col] = le.fit_transform(df_train[col])
    df_test[col]  = le.transform(df_test[col])
    label_encoders[col] = le

# 7. Scale numerical columns between 0 and 1
numerical_columns = [
    'duration', 'src_bytes', 'dst_bytes', 'count', 'srv_count',
    'serror_rate', 'srv_serror_rate', 'same_srv_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_srv_diff_host_rate',
    'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'hot',
    'num_compromised', 'num_root'
]
scaler = MinMaxScaler()
df_train[numerical_columns] = scaler.fit_transform(df_train[numerical_columns])
df_test[numerical_columns]  = scaler.transform(df_test[numerical_columns])

# 8. Prepare test_features and test_labels for final evaluation
X_test = df_test.drop('labels', axis=1).values.astype(np.float32)
y_test = df_test['labels'].values.astype(np.int32)
test_features = X_test
test_labels   = y_test

# 9. Print verification
print("Unique labels in train after mapping:", np.unique(df_train['labels']))
print("Unique labels in test after mapping: ", np.unique(df_test['labels']))
print("Test features shape:", test_features.shape)
print("Test labels shape:", test_labels.shape)

In [None]:
# ## Cell 2 (Updated): Label‐Skew Partitioning with Variable Class Proportions

import numpy as np
import pandas as pd

# 1) How many clients and the minimum samples per class to guarantee
num_partitions = 10
min_samples_per_class = 50  # each client gets at least this many samples per allowed label

# 2) Mapping of which classes each client can observe
#    (Adjust this dictionary however you like; here we assume 5 total classes: 0..4.)
client_class_map = {
    0: [0, 1],   # client 0 sees only labels 0 & 1
    1: [0, 2],   # client 1 sees only labels 0 & 2
    2: [0, 3],   # client 2 sees only labels 0 & 3
    3: [0, 4],   # client 3 sees only labels 0 & 4
    4: [1, 2],   # client 4 sees only labels 1 & 2
    5: [1, 3],   # client 5 sees only labels 1 & 3
    6: [1, 4],   # client 6 sees only labels 1 & 4
    7: [2, 3],   # client 7 sees only labels 2 & 3
    8: [2, 4],   # client 8 sees only labels 2 & 4
    9: [3, 4]    # client 9 sees only labels 3 & 4
}

def create_label_skew_partitions(
    client_class_map: dict,
    seed: int,
    num_clients: int = 10,
    min_samples_per_class: int = 50,
    prop_low: float = 0.05,
    prop_high: float = 0.5
):
    """
    Creates `num_clients` DataFrames where each client_i only has samples whose 'labels'
    ∈ client_class_map[i]. For each allowed label, we sample a random proportion p∼Uniform(prop_low, prop_high),
    clamp to at least min_samples_per_class, but not more than the total available for that class.
    Everything is reproducible via `seed`.

    Returns: List of `num_clients` Pandas DataFrames.
    """
    np.random.seed(seed)
    data_partitions = []

    for client_id in range(num_clients):
        allowed_labels = client_class_map.get(client_id, [])
        if not allowed_labels:
            raise ValueError(f"Client {client_id} has no allowed labels in client_class_map.")

        partition = pd.DataFrame()
        for label in allowed_labels:
            class_data = df_train[df_train["labels"] == label]
            num_available = len(class_data)
            if num_available == 0:
                # If this label doesn’t exist at all in df_train, skip it.
                continue

            # 2.1. Draw a random proportion p in [prop_low, prop_high], reproducibly:
            rand_gen = np.random.RandomState(seed + client_id + label)
            p = rand_gen.uniform(prop_low, prop_high)

            # 2.2. Compute how many to take:
            #      floor(p * num_available), but at least min_samples_per_class, and ≤ num_available
            n_prop = int(np.floor(p * num_available))
            n_take = max(min_samples_per_class, n_prop)
            n_take = min(n_take, num_available)

            # 2.3. Sample those n_take examples from this label, reproducibly:
            sampled = class_data.sample(n=n_take, replace=False, random_state=(seed + client_id + label))

            partition = pd.concat([partition, sampled], ignore_index=True)

        # 2.4. Shuffle the client's combined partition
        partition = partition.sample(frac=1, random_state=(seed + client_id)).reset_index(drop=True)
        data_partitions.append(partition)

        # 2.5. Print out the resulting counts per label for verification:
        print(f"Client {client_id} sees labels {allowed_labels} and got:")
        print(partition["labels"].value_counts().sort_index())
        print()

    return data_partitions

# Example usage to test:
# partitions_label_skew = create_label_skew_partitions(client_class_map, seed=42, num_clients=10, min_samples_per_class=50)
# for i, part in enumerate(partitions_label_skew):
#     print(f"Partition {i} size: {len(part)} samples")


In [None]:
# ## Cell 3: Build TensorFlow Datasets (90/10 split per client)

import numpy as np
import tensorflow as tf

def build_client_datasets(data_partitions: list, batch_size: int = 32, seed: int = 42):
    """
    Given a list of Pandas DataFrames (data_partitions), create:
      - train_datasets: list of tf.data.Dataset for each client (90% of that client's data)
      - val_datasets:   list of tf.data.Dataset for each client (10% of that client's data)
    Returns: train_datasets, val_datasets (each is a length‐num_clients list).
    """
    train_datasets = []
    val_datasets   = []

    for client_id, partition in enumerate(data_partitions):
        # 1. Exact 90/10 split
        total = len(partition)
        n_train = (total // 10) * 9
        n_val   = total - n_train

        # Shuffle with a reproducible seed
        shuffled = partition.sample(frac=1, random_state=seed + client_id).reset_index(drop=True)
        df_train_part = shuffled.iloc[:n_train]
        df_val_part   = shuffled.iloc[n_train:]

        # Extract features & labels
        X_tr = df_train_part.drop(columns=['labels']).values.astype(np.float32)
        y_tr = df_train_part['labels'].values.astype(np.int32)
        X_va = df_val_part.drop(columns=['labels']).values.astype(np.float32)
        y_va = df_val_part['labels'].values.astype(np.int32)

        # Create TensorFlow datasets (batched)
        ds_train = tf.data.Dataset.from_tensor_slices((X_tr, y_tr)).shuffle(buffer_size=n_train, seed=seed+client_id).batch(batch_size)
        ds_val   = tf.data.Dataset.from_tensor_slices((X_va, y_va)).batch(batch_size)

        train_datasets.append(ds_train)
        val_datasets.append(ds_val)

    return train_datasets, val_datasets

# Example usage/test:
# data_parts = create_partitions(alpha=0.5, seed=42)
# train_ds_list, val_ds_list = build_client_datasets(data_parts, batch_size=32, seed=42)
# for i, ds in enumerate(train_ds_list):
#     print(f"Client {i} train batches:", ds)

In [None]:
# ## Cell 4: Define Two Architectures

import tensorflow as tf

def build_simple_model():
    """
    Simple 3-layer MLP:
      Input(40) → Dense(128, relu) → Dense(64, relu) → Dense(5, softmax)
    """
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(40,)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(5, activation='softmax')
    ])
    return model

def build_complex_model():
    """
    More complex MLP:
      Input(40) → Dense(256, relu) → Dense(128, relu) → Dense(64, relu) → Dense(5, softmax)
    """
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(40,)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(5, activation='softmax')
    ])
    return model

# Quick check of parameter counts:
# m1 = build_simple_model()
# m2 = build_complex_model()
# print("Simple params:", m1.count_params())
# print("Complex params:", m2.count_params())

In [None]:
# ## Cell 5: Federated Learning (FedAvg) with Dropout and Metric Logging

import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
import time
import collections
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 5.1. Helper to convert a batched tf.data.Dataset to (X_all, y_all) NumPy arrays
def dataset_to_numpy(dataset: tf.data.Dataset):
    """
    Given a tf.data.Dataset of (features, labels), unbatch and stack into NumPy arrays.
    """
    X_list = []
    y_list = []
    for x_b, y_b in dataset.unbatch():
        X_list.append(x_b.numpy())
        y_list.append(y_b.numpy())
    if len(X_list) == 0:
        return np.zeros((0, 40), dtype=np.float32), np.zeros((0,), dtype=np.int32)
    X_np = np.stack(X_list, axis=0)
    y_np = np.stack(y_list, axis=0)
    return X_np, y_np

# 5.2. Main function to run FedAvg for one configuration
def run_fedavg(
    seed: int,
    dropout_rate: float,
    architecture: str,
    train_datasets: list,
    val_datasets: list,
    test_features: np.ndarray,
    test_labels: np.ndarray,
    num_clients: int = 10,
    num_rounds: int = 30,
    batch_size: int = 32
):
    """
    Executes one FedAvg experiment given:
      - seed: random seed for reproducibility
      - alpha: Dirichlet concentration parameter (unused for label-skew, but we keep it)
      - dropout_rate: fraction of clients dropped each round
      - architecture: either "simple" or "complex"
      - train_datasets/val_datasets: list of tf.data.Dataset for each client
      - test_features/test_labels: central test set
    Returns:
      - global_metrics: dict of global results
      - local_metrics:  dict mapping client_id → local result dict
    """
    # 1. Set seeds
    tf.keras.utils.set_random_seed(seed)
    np.random.seed(seed)

    # 2. Choose model builder
    if architecture == 'simple':
        model_fn = build_simple_model
    else:
        model_fn = build_complex_model

    # 3. Compute model size in MB (float32: 4 bytes per weight)
    temp_model = model_fn()
    model_size_bytes = sum([tf.size(w).numpy() for w in temp_model.weights]) * 4
    model_size_mb = model_size_bytes / (1024 ** 2)

    # 4. Prepare federated_train_data for all clients
    def preprocess(dataset):
        """ Wraps each client's tf.data.Dataset into the TFF expected format. """
        def batch_format_fn(features, labels):
            return collections.OrderedDict(
                x=tf.reshape(features, [-1, 40]),
                y=tf.reshape(labels, [-1])
            )
        return dataset.map(batch_format_fn).prefetch(tf.data.AUTOTUNE)

    federated_train_data = [preprocess(ds) for ds in train_datasets]

    # 5. Define TFF model function
    def tff_model_fn():
        keras_model = model_fn()
        return tff.learning.models.from_keras_model(
            keras_model,
            input_spec=federated_train_data[0].element_spec,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
        )

    # 6. Build the Federated Averaging process
    iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
        model_fn=tff_model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.001),
        server_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.01)
    )

    state = iterative_process.initialize()
    train_losses = []            # store average client train loss each round
    comm_cost_mb = 0.0           # total communication cost (MB)

    # 7. Precompute client subsets for each round (dropout simulation)
    clients_per_round = int(num_clients * (1.0 - dropout_rate))
    if clients_per_round < 1:
        raise ValueError("dropout_rate is too large: no clients would remain.")

    client_subsets = []
    for r in range(num_rounds):
        chosen = np.random.choice(
            np.arange(num_clients),
            size=clients_per_round,
            replace=False
        )
        client_subsets.append(chosen.tolist())

    # 8. Training loop
    start_time = time.time()
    for round_idx in range(num_rounds):
        participating = client_subsets[round_idx]
        fed_data = [federated_train_data[i] for i in participating]

        result = iterative_process.next(state, fed_data)
        state = result.state
        train_loss = result.metrics['client_work']['train']['loss']
        train_losses.append(train_loss)

        # Communication cost: each participating client downloads + uploads model once per round
        comm_cost_mb += model_size_mb * len(participating) * 2

        # (Optional) Print intermediate metrics every 5 rounds
        if (round_idx + 1) % 5 == 0:
            # Evaluate current global model on central test set
            eval_model = model_fn()
            eval_model.set_weights(iterative_process.get_model_weights(state).trainable)
            logits = eval_model.predict(test_features, batch_size=batch_size)
            y_pred = np.argmax(logits, axis=1)
            acc  = accuracy_score(test_labels, y_pred)
            prec = precision_score(test_labels, y_pred, average='macro', zero_division=0)
            rec  = recall_score(test_labels, y_pred, average='macro', zero_division=0)
            f1   = f1_score(test_labels, y_pred, average='macro', zero_division=0)
            test_loss = tf.keras.losses.sparse_categorical_crossentropy(test_labels, logits).numpy().mean()

            print(f"\n[Round {round_idx+1}/{num_rounds}] "
                  f"Client-avg Train Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f} | "
                  f"Acc: {acc:.4f} | F1: {f1:.4f}")

    train_time = time.time() - start_time

    # 9. Final global evaluation on central test set
    final_model = model_fn()
    final_model.set_weights(iterative_process.get_model_weights(state).trainable)
    final_logits = final_model.predict(test_features, batch_size=batch_size)
    final_pred   = np.argmax(final_logits, axis=1)
    final_test_loss  = tf.keras.losses.sparse_categorical_crossentropy(test_labels, final_logits).numpy().mean()
    final_acc        = accuracy_score(test_labels, final_pred)
    final_prec       = precision_score(test_labels, final_pred, average='macro', zero_division=0)
    final_rec        = recall_score(test_labels, final_pred, average='macro', zero_division=0)
    final_f1         = f1_score(test_labels, final_pred, average='macro', zero_division=0)

    global_metrics = {
        'seed': seed,
        'dropout_rate': dropout_rate,
        'architecture': architecture,
        'train_time_sec': train_time,
        'model_size_mb': model_size_mb,
        'comm_cost_mb': comm_cost_mb,
        'train_loss_curve': train_losses,        # list of length num_rounds
        'final_test_loss': final_test_loss,
        'accuracy': final_acc,
        'precision': final_prec,
        'recall': final_rec,
        'f1_score': final_f1
    }

    # 10. Local evaluation: apply the final global model on each client's validation set
    local_metrics = {}
    for client_id, val_ds in enumerate(val_datasets):
        X_val, y_val = dataset_to_numpy(val_ds)
        if X_val.shape[0] == 0:
            # If a client had zero validation samples, record NaNs
            local_metrics[client_id] = {
                'accuracy': np.nan,
                'precision': np.nan,
                'recall': np.nan,
                'f1_score': np.nan
            }
            continue

        y_pred_loc = np.argmax(final_model.predict(X_val, batch_size=batch_size), axis=1)
        acc_loc  = accuracy_score(y_val, y_pred_loc)
        prec_loc = precision_score(y_val, y_pred_loc, average='macro', zero_division=0)
        rec_loc  = recall_score(y_val, y_pred_loc, average='macro', zero_division=0)
        f1_loc   = f1_score(y_val, y_pred_loc, average='macro', zero_division=0)

        local_metrics[client_id] = {
            'accuracy': acc_loc,
            'precision': prec_loc,
            'recall': rec_loc,
            'f1_score': f1_loc
        }

    return global_metrics, local_metrics


In [None]:
# ## Cell 6 (Label‐Skew Version, fixed): Execute Experiments and Collect Results

import pandas as pd

# 6.1. Experimental grid: seeds, dropout rates, architectures
seeds         = [42, 123, 456]
dropout_list  = [0.0, 0.1, 0.2]
architectures = ['simple', 'complex']

# 6.1.1. Same client_class_map used in Cell 2
client_class_map = {
    0: [0, 1],
    1: [0, 2],
    2: [0, 3],
    3: [0, 4],
    4: [1, 2],
    5: [1, 3],
    6: [1, 4],
    7: [2, 3],
    8: [2, 4],
    9: [3, 4]
}

# 6.2. Initialize result containers
all_global_results = []
all_local_results  = []

# 6.3. Loop over seeds, dropout rates, and architectures
for seed in seeds:
    # 6.3.1. Build label‐skew partitions for this seed
    partitions = create_label_skew_partitions(
        client_class_map=client_class_map,
        seed=seed,
        num_clients=10,
        min_samples_per_class=50
    )

    # 6.3.2. Build train/val datasets for each client
    train_ds_list, val_ds_list = build_client_datasets(
        data_partitions=partitions,
        batch_size=32,
        seed=seed
    )

    for dropout_rate in dropout_list:
        for arch in architectures:
            print(f"\n=== Running (Label‐Skew): seed={seed} | dropout={dropout_rate} | arch={arch} ===")

            # 6.3.3. Run FedAvg for this condition (no 'alpha' argument)
            g_metrics, l_metrics = run_fedavg(
                seed=seed,
                dropout_rate=dropout_rate,
                architecture=arch,
                train_datasets=train_ds_list,
                val_datasets=val_ds_list,
                test_features=test_features,
                test_labels=test_labels,
                num_clients=10,
                num_rounds=30,
                batch_size=32
            )

            # 6.3.4. Append global metrics (no 'alpha' field)
            global_row = {
                'seed': seed,
                'dropout_rate': dropout_rate,
                'architecture': arch,
                'train_time_sec': g_metrics['train_time_sec'],
                'model_size_mb': g_metrics['model_size_mb'],
                'comm_cost_mb': g_metrics['comm_cost_mb'],
                'train_loss_curve': g_metrics['train_loss_curve'],
                'final_test_loss': g_metrics['final_test_loss'],
                'accuracy': g_metrics['accuracy'],
                'precision': g_metrics['precision'],
                'recall': g_metrics['recall'],
                'f1_score': g_metrics['f1_score']
            }
            all_global_results.append(global_row)

            # 6.3.5. Append local client metrics
            for client_id, metrics_dict in l_metrics.items():
                row = {
                    'seed': seed,
                    'dropout_rate': dropout_rate,
                    'architecture': arch,
                    'client_id': client_id,
                    'local_accuracy': metrics_dict['accuracy'],
                    'local_precision': metrics_dict['precision'],
                    'local_recall': metrics_dict['recall'],
                    'local_f1_score': metrics_dict['f1_score']
                }
                all_local_results.append(row)

# 6.4. Convert to pandas DataFrames
df_global_results_label_skew = pd.DataFrame(all_global_results)
df_local_results_label_skew  = pd.DataFrame(all_local_results)

# 6.5. Display summaries
print("\n--- Global Results (Label‐Skew) Summary ---")
display(df_global_results_label_skew.head())

print("\n--- Local Results (Label‐Skew) Summary ---")
display(df_local_results_label_skew.head())

# 6.6. (Optional) Save to CSV
df_global_results_label_skew.to_csv(
    '/content/drive/MyDrive/Colab Notebooks/fedavg_global_results_label_skew.csv',
    index=False
)
df_local_results_label_skew.to_csv(
    '/content/drive/MyDrive/Colab Notebooks/fedavg_local_results_label_skew.csv',
    index=False
)

print("\nSaved:")
print("  • fedavg_global_results_label_skew.csv")
print("  • fedavg_local_results_label_skew.csv")


Client 0 sees labels [0, 1] and got:
labels
0    14717
1     4674
Name: count, dtype: int64

Client 1 sees labels [0, 2] and got:
labels
0    6853
2    5770
Name: count, dtype: int64

Client 2 sees labels [0, 3] and got:
labels
0    28666
3      100
Name: count, dtype: int64

Client 3 sees labels [0, 4] and got:
labels
0    33338
4       50
Name: count, dtype: int64

Client 4 sees labels [1, 2] and got:
labels
1    4641
2     674
Name: count, dtype: int64

Client 5 sees labels [1, 3] and got:
labels
1    2657
3     271
Name: count, dtype: int64

Client 6 sees labels [1, 4] and got:
labels
1    8516
4      50
Name: count, dtype: int64

Client 7 sees labels [2, 3] and got:
labels
2    4127
3     418
Name: count, dtype: int64

Client 8 sees labels [2, 4] and got:
labels
2    4900
4      50
Name: count, dtype: int64

Client 9 sees labels [3, 4] and got:
labels
3    237
4     50
Name: count, dtype: int64


=== Running (Label‐Skew): seed=42 | dropout=0.0 | arch=simple ===

[Round 5/30] Clien

Unnamed: 0,seed,dropout_rate,architecture,train_time_sec,model_size_mb,comm_cost_mb,train_loss_curve,final_test_loss,accuracy,precision,recall,f1_score
0,42,0.0,simple,149.856721,0.052753,31.652069,"[0.09131274, 0.19697416, 0.27290508, 0.2612557...",4.856946,0.528078,0.302174,0.216306,0.165928
1,42,0.0,complex,209.915879,0.198261,118.956757,"[0.08228732, 0.15350342, 0.23729135, 0.2792402...",1.024362,0.871318,0.550093,0.49083,0.507838
2,42,0.1,simple,143.526655,0.052753,28.486862,"[0.09300439, 0.2667412, 0.25028083, 0.25861675...",4.569065,0.526393,0.301012,0.215395,0.164301
3,42,0.1,complex,194.699502,0.198261,107.061081,"[0.08250925, 0.19678475, 0.2177858, 0.37897024...",5.848894,0.498802,0.09976,0.2,0.13312
4,42,0.2,simple,119.4008,0.052753,25.321655,"[0.11925745, 0.22849369, 0.23914647, 0.2449885...",4.08668,0.528744,0.483465,0.247243,0.214813



--- Local Results (Label‐Skew) Summary ---


Unnamed: 0,seed,dropout_rate,architecture,client_id,local_accuracy,local_precision,local_recall,local_f1_score
0,42,0.0,simple,0,0.791237,0.893196,0.548998,0.529462
1,42,0.0,simple,1,0.549407,0.183281,0.333333,0.236515
2,42,0.0,simple,2,0.995836,0.332061,0.333217,0.332638
3,42,0.0,simple,3,0.998805,0.499402,0.5,0.499701
4,42,0.0,simple,4,0.095149,0.333333,0.036325,0.065511



Saved:
  • fedavg_global_results_label_skew.csv
  • fedavg_local_results_label_skew.csv
