<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/%5B20250607%5D%20FedAvg%20MLP-LSTM_logged_extended%20on%20WSN-DS_Ver.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*20250531 FedAvg MLP-LSTM_logged_extended on WSN-DS*

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, InputLayer
from keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
# --- Imports ---------------------------------------------------------------
import numpy as np, time, tensorflow as tf

In [None]:
!pip install imbalanced-learn



In [None]:
# ----------------------------
# Preprocessing and Partitioning
# ----------------------------
df = pd.read_csv("dataset_WSN-DS.csv").dropna()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = LabelEncoder().fit_transform(df[col])

X, y = df.drop('Class', axis=1), df['Class']
X = StandardScaler().fit_transform(X)
X, y = SMOTE(random_state=42).fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101, stratify=y)

In [None]:
# --- Federated parameters ---------------------------------------------------
NUM_CLIENTS = 5
ROUNDS       = 30
LOCAL_EPOCHS = 1
BATCH_SIZE   = 32

# --- Data -------------------------------------------------------------------
# X_train: (N, T, 1)  | y_train: (N,)
# Make sure X_train is already 3-D!
client_X = np.array_split(X_train, NUM_CLIENTS, axis=0)
client_y = np.array_split(y_train, NUM_CLIENTS, axis=0)

X_test   = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test_ohe = to_categorical(y_test)
NUM_CLASSES = y_test_ohe.shape[1]
INPUT_SHAPE = (X_test.shape[1], 1)

  return bound(*args, **kwds)


In [None]:
# --- Model builder ----------------------------------------------------------
def build_model(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES,
                lstm_units=64, dense_units=(128, 64), drop=0.30):
    model = Sequential([InputLayer(input_shape=input_shape),
                        LSTM(lstm_units, activation='tanh')])
    for u in dense_units:
        model.add(Dense(u, activation='relu'))
        model.add(Dropout(drop))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# --- Resource profile (lightweight) ----------------------------------------
global_model = build_model()
tot_params   = global_model.count_params()
model_sizeMB = round((tot_params * 4) / 2**20, 2)      # 4 bytes / param
COMM_MB_UP   = model_sizeMB * NUM_CLIENTS              # uplink only

print(f"[Profile] Params={tot_params:,}  Size={model_sizeMB} MB")

# --- Federated loop ---------------------------------------------------------
global_weights = global_model.get_weights()
history = []

for r in range(1, ROUNDS + 1):
    t0 = time.time()
    client_weights, client_sizes = [], []

    # -------- local training --------
    for Xi, yi in zip(client_X, client_y):
        local = build_model()
        local.set_weights(global_weights)
        local.fit(Xi, to_categorical(yi, NUM_CLASSES),
                  epochs=LOCAL_EPOCHS,
                  batch_size=BATCH_SIZE,
                  verbose=0)
        client_weights.append(local.get_weights())
        client_sizes.append(len(yi))

    # -------- FedAvg aggregation ----
    total_samples = sum(client_sizes)
    new_weights = []
    for layer in range(len(global_weights)):
        layer_updates = np.array([
            (n / total_samples) * w[layer]
            for w, n in zip(client_weights, client_sizes)
        ])
        new_weights.append(np.sum(layer_updates, axis=0))
    global_weights = new_weights
    global_model.set_weights(global_weights)

    # -------- Evaluation ------------
    y_prob = global_model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_prob, axis=1)
    y_true = np.argmax(y_test_ohe, axis=1)

    metrics = dict(
        Round=r,
        Accuracy=accuracy_score(y_true, y_pred),
        Precision=precision_score(y_true, y_pred, average='weighted'),
        Recall=recall_score(y_true, y_pred, average='weighted'),
        F1=f1_score(y_true, y_pred, average='weighted'),
        AUC=roc_auc_score(y_test_ohe, y_prob, multi_class='ovr',
                          average='weighted'),
        Time_s=round(time.time() - t0, 2),
        Comm_MB=COMM_MB_UP           # add +downlink if desired
    )
    history.append(metrics)
    print(f"Round {r:02d}: acc={metrics['Accuracy']:.8f} "
          f"F1={metrics['F1']:.8f}  comm={metrics['Comm_MB']} MB")

# history is a list of dicts ready for a Pandas DataFrame



[Profile] Params=33,797  Size=0.13 MB
Round 01: acc=0.89458223 F1=0.89039921  comm=0.65 MB




Round 02: acc=0.90641228 F1=0.90317576  comm=0.65 MB




Round 03: acc=0.96951180 F1=0.96948315  comm=0.65 MB




Round 04: acc=0.97806602 F1=0.97804490  comm=0.65 MB




Round 05: acc=0.97985391 F1=0.97983113  comm=0.65 MB




Round 06: acc=0.97990096 F1=0.97988277  comm=0.65 MB




Round 07: acc=0.98135950 F1=0.98134117  comm=0.65 MB




Round 08: acc=0.98079196 F1=0.98077418  comm=0.65 MB




Round 09: acc=0.98150653 F1=0.98149213  comm=0.65 MB




Round 10: acc=0.98244164 F1=0.98242720  comm=0.65 MB




Round 11: acc=0.98246223 F1=0.98244762  comm=0.65 MB




Round 12: acc=0.98327972 F1=0.98326675  comm=0.65 MB




Round 13: acc=0.98236519 F1=0.98235175  comm=0.65 MB




Round 14: acc=0.98329442 F1=0.98328057  comm=0.65 MB




Round 15: acc=0.98353555 F1=0.98352040  comm=0.65 MB




Round 16: acc=0.98312386 F1=0.98311103  comm=0.65 MB




Round 17: acc=0.98349144 F1=0.98347636  comm=0.65 MB




Round 18: acc=0.98347674 F1=0.98346202  comm=0.65 MB




Round 19: acc=0.98358848 F1=0.98357621  comm=0.65 MB




Round 20: acc=0.98408250 F1=0.98407034  comm=0.65 MB




Round 21: acc=0.98365611 F1=0.98364117  comm=0.65 MB




Round 22: acc=0.98379432 F1=0.98378289  comm=0.65 MB




Round 23: acc=0.98415308 F1=0.98414212  comm=0.65 MB




Round 24: acc=0.98415308 F1=0.98414190  comm=0.65 MB




Round 25: acc=0.98435598 F1=0.98434451  comm=0.65 MB




Round 26: acc=0.98415014 F1=0.98413915  comm=0.65 MB




Round 27: acc=0.98481765 F1=0.98480727  comm=0.65 MB




Round 28: acc=0.98449419 F1=0.98448194  comm=0.65 MB




Round 29: acc=0.98513230 F1=0.98512192  comm=0.65 MB




Round 30: acc=0.98520581 F1=0.98519609  comm=0.65 MB


In [None]:
# ----------------------------
# Save Log
# ----------------------------
# Use the correctly named variable 'history' to create the DataFrame
history_df = pd.DataFrame(history)
# Correct the filename to match the download call
log_filename = "fedavg_training_and_resource_log_WSN-DS.csv"
# Use the correctly named DataFrame 'history_df' to save to CSV
history_df.to_csv(log_filename, index=False)
print(f"\nLog saved to {log_filename}")


# ----------------------------
# Download Log
# ----------------------------
from google.colab import files
# Use the corrected filename for download
files.download(log_filename)


Log saved to fedavg_training_and_resource_log_WSN-DS.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# prompt: Please display all the resource that used in this training model

print("Total parameters:", tot_params)
print("Model size:", model_sizeMB, "MB")
print("Communication per round (uplink):", COMM_MB_UP, "MB")
print("\nDetailed resource usage per round:")
print(history_df[['Round', 'Time_s', 'Comm_MB']])

Total parameters: 33797
Model size: 0.13 MB
Communication per round (uplink): 0.65 MB

Detailed resource usage per round:
    Round  Time_s  Comm_MB
0       1  288.74     0.65
1       2  282.45     0.65
2       3  279.75     0.65
3       4  278.49     0.65
4       5  269.64     0.65
5       6  271.53     0.65
6       7  269.94     0.65
7       8  275.90     0.65
8       9  269.77     0.65
9      10  271.49     0.65
10     11  270.57     0.65
11     12  269.77     0.65
12     13  275.33     0.65
13     14  270.36     0.65
14     15  270.25     0.65
15     16  276.24     0.65
16     17  277.28     0.65
17     18  299.31     0.65
18     19  286.28     0.65
19     20  281.94     0.65
20     21  276.01     0.65
21     22  274.81     0.65
22     23  277.50     0.65
23     24  292.30     0.65
24     25  276.22     0.65
25     26  273.83     0.65
26     27  273.01     0.65
27     28  274.80     0.65
28     29  284.04     0.65
29     30  282.16     0.65
