<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/%5B20250605%5D%20FedAvg%20MLP-LSTM_logged_extended%20on%20WSN-BFSF)_Ver.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***20250605 FedAvg MLP-LSTM_logged_extended on WSN-BFSF***

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, InputLayer
from keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
# --- Imports ---------------------------------------------------------------
import numpy as np, time, tensorflow as tf

In [None]:
!pip install imbalanced-learn



In [None]:
# ----------------------------
# Preprocessing and Partitioning
# ----------------------------
df = pd.read_csv("dataset.csv").dropna()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = LabelEncoder().fit_transform(df[col])

X, y = df.drop('Class', axis=1), df['Class']
X = StandardScaler().fit_transform(X)
X, y = SMOTE(random_state=42).fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101, stratify=y)

In [None]:
# --- Federated parameters ---------------------------------------------------
NUM_CLIENTS = 5
ROUNDS       = 30
LOCAL_EPOCHS = 1
BATCH_SIZE   = 32

# --- Data -------------------------------------------------------------------
# X_train: (N, T, 1)  | y_train: (N,)
# Make sure X_train is already 3-D!
client_X = np.array_split(X_train, NUM_CLIENTS, axis=0)
client_y = np.array_split(y_train, NUM_CLIENTS, axis=0)

X_test   = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test_ohe = to_categorical(y_test)
NUM_CLASSES = y_test_ohe.shape[1]
INPUT_SHAPE = (X_test.shape[1], 1)

  return bound(*args, **kwds)


In [None]:
# --- Model builder ----------------------------------------------------------
def build_model(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES,
                lstm_units=64, dense_units=(128, 64), drop=0.30):
    model = Sequential([InputLayer(input_shape=input_shape),
                        LSTM(lstm_units, activation='tanh')])
    for u in dense_units:
        model.add(Dense(u, activation='relu'))
        model.add(Dropout(drop))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# --- Resource profile (lightweight) ----------------------------------------
global_model = build_model()
tot_params   = global_model.count_params()
model_sizeMB = round((tot_params * 4) / 2**20, 2)      # 4 bytes / param
COMM_MB_UP   = model_sizeMB * NUM_CLIENTS              # uplink only

print(f"[Profile] Params={tot_params:,}  Size={model_sizeMB} MB")

# --- Federated loop ---------------------------------------------------------
global_weights = global_model.get_weights()
history = []

for r in range(1, ROUNDS + 1):
    t0 = time.time()
    client_weights, client_sizes = [], []

    # -------- local training --------
    for Xi, yi in zip(client_X, client_y):
        local = build_model()
        local.set_weights(global_weights)
        local.fit(Xi, to_categorical(yi, NUM_CLASSES),
                  epochs=LOCAL_EPOCHS,
                  batch_size=BATCH_SIZE,
                  verbose=0)
        client_weights.append(local.get_weights())
        client_sizes.append(len(yi))

    # -------- FedAvg aggregation ----
    total_samples = sum(client_sizes)
    new_weights = []
    for layer in range(len(global_weights)):
        layer_updates = np.array([
            (n / total_samples) * w[layer]
            for w, n in zip(client_weights, client_sizes)
        ])
        new_weights.append(np.sum(layer_updates, axis=0))
    global_weights = new_weights
    global_model.set_weights(global_weights)

    # -------- Evaluation ------------
    y_prob = global_model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_prob, axis=1)
    y_true = np.argmax(y_test_ohe, axis=1)

    metrics = dict(
        Round=r,
        Accuracy=accuracy_score(y_true, y_pred),
        Precision=precision_score(y_true, y_pred, average='weighted'),
        Recall=recall_score(y_true, y_pred, average='weighted'),
        F1=f1_score(y_true, y_pred, average='weighted'),
        AUC=roc_auc_score(y_test_ohe, y_prob, multi_class='ovr',
                          average='weighted'),
        Time_s=round(time.time() - t0, 2),
        Comm_MB=COMM_MB_UP           # add +downlink if desired
    )
    history.append(metrics)
    print(f"Round {r:02d}: acc={metrics['Accuracy']:.8f} "
          f"F1={metrics['F1']:.8f}  comm={metrics['Comm_MB']} MB")

# history is a list of dicts ready for a Pandas DataFrame



[Profile] Params=33,732  Size=0.13 MB
Round 01: acc=0.72720312 F1=0.72069752  comm=0.65 MB




Round 02: acc=0.89882586 F1=0.89580256  comm=0.65 MB




Round 03: acc=0.90703392 F1=0.90493488  comm=0.65 MB




Round 04: acc=0.91737722 F1=0.91577562  comm=0.65 MB




Round 05: acc=0.91646892 F1=0.91499060  comm=0.65 MB




Round 06: acc=0.92108655 F1=0.92016410  comm=0.65 MB




Round 07: acc=0.93126340 F1=0.93038754  comm=0.65 MB




Round 08: acc=0.93678934 F1=0.93614757  comm=0.65 MB




Round 09: acc=0.94703754 F1=0.94662367  comm=0.65 MB




Round 10: acc=0.94976246 F1=0.94953505  comm=0.65 MB




Round 11: acc=0.94993842 F1=0.94970717  comm=0.65 MB




Round 12: acc=0.94997646 F1=0.94965608  comm=0.65 MB




Round 13: acc=0.95675786 F1=0.95656873  comm=0.65 MB




Round 14: acc=0.95326729 F1=0.95312285  comm=0.65 MB




Round 15: acc=0.95410427 F1=0.95401469  comm=0.65 MB




Round 16: acc=0.96109492 F1=0.96095958  comm=0.65 MB




Round 17: acc=0.96185105 F1=0.96174760  comm=0.65 MB




Round 18: acc=0.96328722 F1=0.96318955  comm=0.65 MB




Round 19: acc=0.96446659 F1=0.96433432  comm=0.65 MB




Round 20: acc=0.96507530 F1=0.96494682  comm=0.65 MB




Round 21: acc=0.96789534 F1=0.96783136  comm=0.65 MB




Round 22: acc=0.96806654 F1=0.96796923  comm=0.65 MB




Round 23: acc=0.96699179 F1=0.96690488  comm=0.65 MB




Round 24: acc=0.96961685 F1=0.96952121  comm=0.65 MB




Round 25: acc=0.97122422 F1=0.97113694  comm=0.65 MB




Round 26: acc=0.96815689 F1=0.96806928  comm=0.65 MB




Round 27: acc=0.97049662 F1=0.97042434  comm=0.65 MB




Round 28: acc=0.97215631 F1=0.97207677  comm=0.65 MB




Round 29: acc=0.97212302 F1=0.97203837  comm=0.65 MB




Round 30: acc=0.97189475 F1=0.97181216  comm=0.65 MB


In [None]:
# ----------------------------
# Save Log
# ----------------------------
# Use the correctly named variable 'history' to create the DataFrame
history_df = pd.DataFrame(history)
# Correct the filename to match the download call
log_filename = "fedavg_training_and_resource_log_WSN-BFSF.csv"
# Use the correctly named DataFrame 'history_df' to save to CSV
history_df.to_csv(log_filename, index=False)
print(f"\nLog saved to {log_filename}")


# ----------------------------
# Download Log
# ----------------------------
from google.colab import files
# Use the corrected filename for download
files.download(log_filename)


Log saved to fedavg_training_and_resource_log_WSN-BFSF.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# prompt: Please display all the resource that used in this training model

print("Total parameters:", tot_params)
print("Model size:", model_sizeMB, "MB")
print("Communication per round (uplink):", COMM_MB_UP, "MB")
print("\nDetailed resource usage per round:")
print(history_df[['Round', 'Time_s', 'Comm_MB']])


Total parameters: 33732
Model size: 0.13 MB
Communication per round (uplink): 0.65 MB

Detailed resource usage per round:
    Round  Time_s  Comm_MB
0       1  162.83     0.65
1       2  162.68     0.65
2       3  164.52     0.65
3       4  175.76     0.65
4       5  167.49     0.65
5       6  169.90     0.65
6       7  179.70     0.65
7       8  175.92     0.65
8       9  170.37     0.65
9      10  163.35     0.65
10     11  161.92     0.65
11     12  161.00     0.65
12     13  161.27     0.65
13     14  163.46     0.65
14     15  160.77     0.65
15     16  159.11     0.65
16     17  161.57     0.65
17     18  163.72     0.65
18     19  162.83     0.65
19     20  162.47     0.65
20     21  162.76     0.65
21     22  163.40     0.65
22     23  161.07     0.65
23     24  160.97     0.65
24     25  163.88     0.65
25     26  164.30     0.65
26     27  163.60     0.65
27     28  161.87     0.65
28     29  161.43     0.65
29     30  162.33     0.65
