<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/%5B20250621%5D%20CM_%20FedAvg%20MLP-LSTM_logged_extended%20on%20WSN-BFSF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***20250605 FedAvg MLP-LSTM_logged_extended on WSN-BFSF***

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, InputLayer
from keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
# --- Imports ---------------------------------------------------------------
import numpy as np, time, tensorflow as tf

In [None]:
!pip install imbalanced-learn



In [None]:
# ----------------------------
# Preprocessing and Partitioning
# ----------------------------
df = pd.read_csv("dataset.csv").dropna()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = LabelEncoder().fit_transform(df[col])

X, y = df.drop('Class', axis=1), df['Class']
X = StandardScaler().fit_transform(X)
X, y = SMOTE(random_state=42).fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101, stratify=y)

In [None]:
# --- Federated parameters ---------------------------------------------------
NUM_CLIENTS = 5
ROUNDS       = 75
LOCAL_EPOCHS = 1
BATCH_SIZE   = 32

# --- Data -------------------------------------------------------------------
# X_train: (N, T, 1)  | y_train: (N,)
# Make sure X_train is already 3-D!
client_X = np.array_split(X_train, NUM_CLIENTS, axis=0)
client_y = np.array_split(y_train, NUM_CLIENTS, axis=0)

X_test   = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test_ohe = to_categorical(y_test)
NUM_CLASSES = y_test_ohe.shape[1]
INPUT_SHAPE = (X_test.shape[1], 1)

  return bound(*args, **kwds)


In [None]:
# --- Model builder ----------------------------------------------------------
def build_model(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES,
                lstm_units=64, dense_units=(128, 64), drop=0.30):
    model = Sequential([InputLayer(input_shape=input_shape),
                        LSTM(lstm_units, activation='tanh')])
    for u in dense_units:
        model.add(Dense(u, activation='relu'))
        model.add(Dropout(drop))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# --- Resource profile (lightweight) ----------------------------------------
global_model = build_model()
tot_params   = global_model.count_params()
model_sizeMB = round((tot_params * 4) / 2**20, 2)      # 4 bytes / param
COMM_MB_UP   = model_sizeMB * NUM_CLIENTS              # uplink only

print(f"[Profile] Params={tot_params:,}  Size={model_sizeMB} MB")

# --- Federated loop ---------------------------------------------------------
global_weights = global_model.get_weights()
history = []

for r in range(1, ROUNDS + 1):
    t0 = time.time()
    client_weights, client_sizes = [], []

    # -------- local training --------
    for Xi, yi in zip(client_X, client_y):
        local = build_model()
        local.set_weights(global_weights)
        local.fit(Xi, to_categorical(yi, NUM_CLASSES),
                  epochs=LOCAL_EPOCHS,
                  batch_size=BATCH_SIZE,
                  verbose=0)
        client_weights.append(local.get_weights())
        client_sizes.append(len(yi))

    # -------- FedAvg aggregation ----
    total_samples = sum(client_sizes)
    new_weights = []
    for layer in range(len(global_weights)):
        layer_updates = np.array([
            (n / total_samples) * w[layer]
            for w, n in zip(client_weights, client_sizes)
        ])
        new_weights.append(np.sum(layer_updates, axis=0))
    global_weights = new_weights
    global_model.set_weights(global_weights)

    # -------- Evaluation ------------
    y_prob = global_model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_prob, axis=1)
    y_true = np.argmax(y_test_ohe, axis=1)

    metrics = dict(
        Round=r,
        Accuracy=accuracy_score(y_true, y_pred),
        Precision=precision_score(y_true, y_pred, average='weighted'),
        Recall=recall_score(y_true, y_pred, average='weighted'),
        F1=f1_score(y_true, y_pred, average='weighted'),
        AUC=roc_auc_score(y_test_ohe, y_prob, multi_class='ovr',
                          average='weighted'),
        Time_s=round(time.time() - t0, 2),
        Comm_MB=COMM_MB_UP           # add +downlink if desired
    )
    history.append(metrics)
    print(f"Round {r:02d}: acc={metrics['Accuracy']:.8f} "
          f"F1={metrics['F1']:.8f}  comm={metrics['Comm_MB']} MB")

# history is a list of dicts ready for a Pandas DataFrame



[Profile] Params=33,732  Size=0.13 MB
Round 01: acc=0.75864201 F1=0.75097365  comm=0.65 MB




Round 02: acc=0.89586791 F1=0.89338745  comm=0.65 MB




Round 03: acc=0.90529815 F1=0.90368917  comm=0.65 MB




Round 04: acc=0.91913202 F1=0.91779031  comm=0.65 MB




Round 05: acc=0.92675991 F1=0.92562180  comm=0.65 MB




Round 06: acc=0.93207185 F1=0.93120565  comm=0.65 MB




Round 07: acc=0.93229536 F1=0.93143344  comm=0.65 MB




Round 08: acc=0.93696054 F1=0.93626933  comm=0.65 MB




Round 09: acc=0.93376006 F1=0.93293337  comm=0.65 MB




Round 10: acc=0.94683780 F1=0.94647135  comm=0.65 MB




Round 11: acc=0.95172650 F1=0.95144875  comm=0.65 MB




Round 12: acc=0.95263005 F1=0.95241085  comm=0.65 MB




Round 13: acc=0.94578683 F1=0.94546687  comm=0.65 MB




Round 14: acc=0.95692431 F1=0.95675845  comm=0.65 MB




Round 15: acc=0.95949705 F1=0.95930581  comm=0.65 MB




Round 16: acc=0.95774226 F1=0.95760934  comm=0.65 MB




Round 17: acc=0.96044341 F1=0.96032838  comm=0.65 MB




Round 18: acc=0.96247878 F1=0.96235941  comm=0.65 MB




Round 19: acc=0.96131367 F1=0.96119778  comm=0.65 MB




Round 20: acc=0.96586948 F1=0.96580880  comm=0.65 MB




Round 21: acc=0.96938382 F1=0.96928909  comm=0.65 MB




Round 22: acc=0.96692997 F1=0.96685629  comm=0.65 MB




Round 23: acc=0.96554610 F1=0.96542563  comm=0.65 MB




Round 24: acc=0.96705836 F1=0.96694701  comm=0.65 MB




Round 25: acc=0.96856112 F1=0.96844563  comm=0.65 MB




Round 26: acc=0.96934578 F1=0.96926070  comm=0.65 MB




Round 27: acc=0.96690143 F1=0.96677968  comm=0.65 MB




Round 28: acc=0.96844223 F1=0.96833190  comm=0.65 MB




Round 29: acc=0.97233226 F1=0.97224439  comm=0.65 MB




Round 30: acc=0.97274599 F1=0.97266808  comm=0.65 MB




Round 31: acc=0.97113386 F1=0.97106965  comm=0.65 MB




Round 32: acc=0.97042053 F1=0.97031300  comm=0.65 MB




Round 33: acc=0.97180440 F1=0.97172027  comm=0.65 MB




Round 34: acc=0.97087231 F1=0.97076373  comm=0.65 MB




Round 35: acc=0.97428203 F1=0.97424891  comm=0.65 MB




Round 36: acc=0.97213253 F1=0.97206603  comm=0.65 MB




Round 37: acc=0.97451030 F1=0.97445600  comm=0.65 MB




Round 38: acc=0.97890917 F1=0.97887713  comm=0.65 MB




Round 39: acc=0.97697842 F1=0.97692018  comm=0.65 MB




Round 40: acc=0.97670260 F1=0.97664872  comm=0.65 MB




Round 41: acc=0.97734460 F1=0.97728925  comm=0.65 MB




Round 42: acc=0.97610340 F1=0.97606068  comm=0.65 MB




Round 43: acc=0.97515705 F1=0.97509680  comm=0.65 MB




Round 44: acc=0.98054032 F1=0.98051772  comm=0.65 MB




Round 45: acc=0.97611767 F1=0.97607054  comm=0.65 MB




Round 46: acc=0.97983175 F1=0.97980883  comm=0.65 MB




Round 47: acc=0.97955593 F1=0.97952120  comm=0.65 MB




Round 48: acc=0.97308839 F1=0.97304719  comm=0.65 MB




Round 49: acc=0.98250436 F1=0.98249564  comm=0.65 MB




Round 50: acc=0.97986504 F1=0.97983567  comm=0.65 MB




Round 51: acc=0.97882357 F1=0.97877683  comm=0.65 MB




Round 52: acc=0.98186712 F1=0.98184990  comm=0.65 MB




Round 53: acc=0.98271361 F1=0.98270230  comm=0.65 MB




Round 54: acc=0.97861909 F1=0.97859777  comm=0.65 MB




Round 55: acc=0.98009330 F1=0.98006364  comm=0.65 MB




Round 56: acc=0.98160557 F1=0.98158783  comm=0.65 MB




Round 57: acc=0.98171494 F1=0.98170519  comm=0.65 MB




Round 58: acc=0.98205734 F1=0.98204073  comm=0.65 MB




Round 59: acc=0.98317014 F1=0.98316238  comm=0.65 MB




Round 60: acc=0.98278970 F1=0.98277895  comm=0.65 MB




Round 61: acc=0.98448742 F1=0.98447765  comm=0.65 MB




Round 62: acc=0.98359338 F1=0.98357956  comm=0.65 MB




Round 63: acc=0.97228470 F1=0.97223775  comm=0.65 MB




Round 64: acc=0.98222854 F1=0.98219857  comm=0.65 MB




Round 65: acc=0.98493920 F1=0.98493240  comm=0.65 MB




Round 66: acc=0.98542427 F1=0.98542148  comm=0.65 MB




Round 67: acc=0.98192419 F1=0.98191325  comm=0.65 MB




Round 68: acc=0.98562875 F1=0.98562381  comm=0.65 MB




Round 69: acc=0.98631355 F1=0.98630834  comm=0.65 MB




Round 70: acc=0.98398809 F1=0.98396994  comm=0.65 MB




Round 71: acc=0.98541000 F1=0.98540593  comm=0.65 MB




Round 72: acc=0.98531489 F1=0.98530783  comm=0.65 MB




Round 73: acc=0.98659413 F1=0.98658750  comm=0.65 MB




Round 74: acc=0.98674155 F1=0.98673959  comm=0.65 MB




Round 75: acc=0.98633733 F1=0.98633727  comm=0.65 MB


In [None]:
# ----------------------------
# Save Log
# ----------------------------
# Use the correctly named variable 'history' to create the DataFrame
history_df = pd.DataFrame(history)
# Correct the filename to match the download call
log_filename = "fedavg_training_and_resource_log_WSN-BFSF.csv"
# Use the correctly named DataFrame 'history_df' to save to CSV
history_df.to_csv(log_filename, index=False)
print(f"\nLog saved to {log_filename}")


# ----------------------------
# Download Log
# ----------------------------
from google.colab import files
# Use the corrected filename for download
files.download(log_filename)


Log saved to fedavg_training_and_resource_log_WSN-BFSF.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# prompt: Please display all the resource that used in this training model

print("Total parameters:", tot_params)
print("Model size:", model_sizeMB, "MB")
print("Communication per round (uplink):", COMM_MB_UP, "MB")
print("\nDetailed resource usage per round:")
print(history_df[['Round', 'Time_s', 'Comm_MB']])


Total parameters: 33732
Model size: 0.13 MB
Communication per round (uplink): 0.65 MB

Detailed resource usage per round:
    Round  Time_s  Comm_MB
0       1  216.56     0.65
1       2  206.94     0.65
2       3  199.82     0.65
3       4  194.71     0.65
4       5  195.98     0.65
..    ...     ...      ...
70     71  173.14     0.65
71     72  170.69     0.65
72     73  168.35     0.65
73     74  166.21     0.65
74     75  165.63     0.65

[75 rows x 3 columns]


In [None]:
# ---------------------------------------------------------
#  Confusion-matrix block  –  paste just before the script
#  finishes (after the last evaluation / logging section).
# ---------------------------------------------------------
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# --- 1.  Re-evaluate the final model ---------------------
y_prob_final = global_model.predict(X_test, verbose=0)   # FedAvg script
# y_prob_final = g_model.predict(X_te, verbose=0)         # TrustFed script
y_pred_final = np.argmax(y_prob_final, axis=1)
y_true_final = y_test                                    # already integers

# --- 2.  Confusion matrix (raw counts) ------------------
cm = confusion_matrix(y_true_final, y_pred_final)

# Optional: label list; replace with real class names if you have them
class_labels = [f"C{c}" for c in range(cm.shape[0])]

# --- 3.  Save the raw matrix for archival ---------------
# Define DATA_PATH as the filename of the dataset
DATA_PATH = "dataset.csv"
# Construct the cm_path using the current directory
cm_path = f"cm_{DATA_PATH.split('/')[-1].split('.')[0]}_FedAvg.csv" # Removed '/mnt/data/'
np.savetxt(cm_path, cm, delimiter=",", fmt="%d")
print("Confusion-matrix CSV written to", cm_path)

# --- 4.  Make a heat-map figure -------------------------
plt.figure(figsize=(4.5,4))
sns.heatmap(cm,
            annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=class_labels, yticklabels=class_labels,
            linewidths=.5, linecolor='grey')
plt.xlabel("Predicted label")
plt.ylabel("True label")
# Use a fixed string "FedAvg" for the title
plt.title("Confusion Matrix – FedAvg-IDS")
plt.tight_layout()

fig_path = cm_path.replace(".csv", ".png")
plt.savefig(fig_path, dpi=300)        # -> e.g. cm_WSN-BFSF_FedAvg.png
plt.close()
print("Figure saved to", fig_path)
# ---------------------------------------------------------

Confusion-matrix CSV written to cm_dataset_FedAvg.csv
Figure saved to cm_dataset_FedAvg.png


In [None]:
# prompt: download the Confusion-matrix CSV and Figure

# Correct the filenames to match the saving process
cm_path = "confusion_matrix_FedAvg_BFSF.csv"
fig_path = "confusion_matrix_FedAvg_BFSF.png"

# Save the raw matrix for archival
np.savetxt(cm_path, cm, delimiter=",", fmt="%d")
print("Confusion-matrix CSV written to", cm_path)

# Make a heat-map figure
plt.figure(figsize=(4.5,4))
sns.heatmap(cm,
            annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=class_labels, yticklabels=class_labels,
            linewidths=.5, linecolor='grey')
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion Matrix – FedAvg")
plt.tight_layout()

plt.savefig(fig_path, dpi=300)
plt.close()
print("Figure saved to", fig_path)

# Download the Confusion-matrix CSV and Figure
files.download(cm_path)
files.download(fig_path)

Confusion-matrix CSV written to confusion_matrix_FedAvg_BFSF.csv
Figure saved to confusion_matrix_FedAvg_BFSF.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>