<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/%5B20250531%5D%20Trust%20FL%20MLP-LSTM_logged_extended%20on%20WSN-DS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***20250531 Trust FL MLP-LSTM_logged_extended on WSN-DS***

In [1]:
# trust_federated_mlp_lstm_logged_extended.py

import pandas as pd
import numpy as np
import time
import psutil
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, InputLayer
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
from scipy.spatial.distance import cosine

In [2]:
# Load dataset
dataset = pd.read_csv("dataset_WSN-DS.csv")  # WSN-DS 5-class dataset
dataset = dataset.dropna()  # Remove missing values

In [3]:
dataset

Unnamed: 0,id,Time,Is_CH,who CH,Dist_To_CH,ADV_S,ADV_R,JOIN_S,JOIN_R,SCH_S,SCH_R,Rank,DATA_S,DATA_R,Data_Sent_To_BS,dist_CH_To_BS,send_code,Expaned Energy,Class
0,101000,50,1,101000,0.00000,1,0,0,25,1,0,0,0,1200,48,130.08535,0,2.46940,0
1,101001,50,0,101044,75.32345,0,4,1,0,0,1,2,38,0,0,0.00000,4,0.06957,0
2,101002,50,0,101010,46.95453,0,4,1,0,0,1,19,41,0,0,0.00000,3,0.06898,0
3,101003,50,0,101044,64.85231,0,4,1,0,0,1,16,38,0,0,0.00000,4,0.06673,0
4,101004,50,0,101010,4.83341,0,4,1,0,0,1,25,41,0,0,0.00000,3,0.06534,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374656,201096,1003,0,201051,6.98337,0,5,1,0,0,1,7,96,0,67,170.14779,3,0.15974,0
374657,201097,1003,0,201037,29.32867,0,5,1,0,0,1,31,39,0,24,82.21043,2,0.06877,0
374658,201098,1003,0,201095,18.51963,0,5,1,0,0,1,17,55,0,31,139.26438,1,0.09437,0
374659,201099,1003,0,201051,8.55001,0,5,1,0,0,1,3,96,0,65,158.27492,3,0.16047,0


In [4]:
# Encode non-numeric columns (if any)
for column in dataset.columns:
    if dataset[column].dtype == 'object':  # Identify categorical columns
        dataset[column] = LabelEncoder().fit_transform(dataset[column])

# Separate features (X) and target (y)
X = dataset.drop(['Class'], axis=1)  # Assuming 'Class' is the target column
y = dataset['Class']

print("Original Class Distribution:", np.bincount(y))

Original Class Distribution: [340066  14596  10049   6638   3312]


In [5]:
from sklearn.preprocessing import LabelEncoder, StandardScaler # Import StandardScaler here
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

from imblearn.over_sampling import SMOTE

# Apply SMOTE to balance the data
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

# Verify the balanced dataset distribution
print("Balanced Class Distribution:", np.bincount(y))

Balanced Class Distribution: [340066 340066 340066 340066 340066]


In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101, stratify=y)

In [7]:
pip install matplotlib



In [8]:
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, TimeDistributed
from tensorflow.keras.regularizers import l2
import numpy as np

In [9]:
from sklearn.metrics import roc_curve, auc
from itertools import cycle

y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

input_shape = (X_train.shape[1], X_train.shape[2])
num_classes = y_train_cat.shape[1]

# -------------------------
# Utility Functions
# -------------------------
def build_model(input_shape, num_classes):
    model = Sequential([
        InputLayer(input_shape=input_shape),
        LSTM(64, activation='tanh', kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def compute_cosine_similarity(update1, update2):
    flat1 = np.concatenate([w.flatten() for w in update1])
    flat2 = np.concatenate([w.flatten() for w in update2])
    return 1 - cosine(flat1, flat2)

def compute_stability(update, history):
    if len(history) < 2:
        return 1.0
    sims = [compute_cosine_similarity(update, past) for past in history[-3:]]
    return np.mean(sims)

def normalize(value, min_val, max_val):
    return (value - min_val) / (max_val - min_val + 1e-8)

def compute_trust_scores(client_updates, global_weights, val_losses, update_history, alpha=(0.4, 0.4, 0.2)):
    scores = {}
    losses = list(val_losses.values())
    min_loss, max_loss = min(losses), max(losses)
    for cid in client_updates:
        sim = compute_cosine_similarity(client_updates[cid], global_weights)
        loss = val_losses[cid]
        stability = compute_stability(client_updates[cid], update_history.get(cid, []))
        trust = alpha[0] * sim + alpha[1] * (1 - normalize(loss, min_loss, max_loss)) + alpha[2] * stability
        scores[cid] = trust
    return scores

def trust_weighted_aggregation(client_weights, trust_scores):
    total_trust = sum(trust_scores.values())
    return [sum(trust_scores[cid] * np.array(client_weights[cid][i]) for cid in client_weights) / total_trust
            for i in range(len(next(iter(client_weights.values()))))]

# -------------------------
# Federated Training with Profiling
# -------------------------
clients = 5
rounds = 30
client_X = np.array_split(X_train, clients)
client_y = np.array_split(y_train_cat, clients)

global_model = build_model(input_shape, num_classes)
global_weights = global_model.get_weights()
update_history = {i: [] for i in range(clients)}
trust_log, comm_log, round_log = [], [], []

# Model size estimation
model_params = global_model.count_params()
param_size_MB = sum(w.nbytes for w in global_weights) / (1024 ** 2)

for r in range(rounds):
    start_time = time.time()
    round_comm = 0
    local_weights = {}
    val_losses = {}

    for cid in range(clients):
        local_model = build_model(input_shape, num_classes)
        local_model.set_weights(global_weights)
        local_model.fit(client_X[cid], client_y[cid], epochs=1, batch_size=32, verbose=0)
        weights = local_model.get_weights()
        loss = local_model.evaluate(X_test, y_test_cat, verbose=0)[0]
        local_weights[cid] = weights
        val_losses[cid] = loss
        round_comm += sum(w.nbytes for w in weights)
        update_history[cid].append(weights)

    trust_scores = compute_trust_scores(local_weights, global_weights, val_losses, update_history)
    for cid in trust_scores:
        trust_log.append({'round': r+1, 'client_id': cid, 'trust_score': trust_scores[cid]})

    global_weights = trust_weighted_aggregation(local_weights, trust_scores)
    global_model.set_weights(global_weights)

    pred = global_model.predict(X_test)
    pred_classes = np.argmax(pred, axis=1)
    true_classes = np.argmax(y_test_cat, axis=1)
    acc = accuracy_score(true_classes, pred_classes)
    prec = precision_score(true_classes, pred_classes, average='weighted')
    rec = recall_score(true_classes, pred_classes, average='weighted')
    f1 = f1_score(true_classes, pred_classes, average='weighted')
    cm = confusion_matrix(true_classes, pred_classes)
    duration = (time.time() - start_time) * 1000  # ms

    comm_log.append({'round': r+1, 'comm_cost_MB': round_comm / (1024 ** 2)})
    round_log.append({
        'round': r+1,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1_score': f1,
        'round_time_ms': duration
    })



[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 5ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 5ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 5ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 4ms/step




[1m10628/10628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step


In [10]:
import os

# Create the directory if it doesn't exist
output_dir = "/mnt/data"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save Logs
pd.DataFrame(trust_log).to_csv(os.path.join(output_dir, "trust_scores_log.csv"), index=False)
pd.DataFrame(comm_log).to_csv(os.path.join(output_dir, "communication_cost_log.csv"), index=False)
pd.DataFrame(round_log).to_csv(os.path.join(output_dir, "resource_and_performance_log.csv"), index=False)

# Model summary
model_profile = {
    "Parameters (MB)": param_size_MB,
    "FLOPs (estimated)": model_params * 2,
    "Peak Memory (MB)": psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2),
    "Total Rounds": rounds,
    "Clients": clients
}
pd.DataFrame([model_profile]).to_csv(os.path.join(output_dir, "model_resource_profile.csv"), index=False)

In [11]:
# prompt: Download all Logs .csv files aboved

from google.colab import files

# Specify the directory where the files were saved
output_dir = "/mnt/data"

# Define the list of filenames to download
filenames_to_download = [
    "trust_scores_log.csv",
    "communication_cost_log.csv",
    "resource_and_performance_log.csv",
    "model_resource_profile.csv"
]

# Download each file
for filename in filenames_to_download:
    file_path = os.path.join(output_dir, filename)
    if os.path.exists(file_path):
        files.download(file_path)
    else:
        print(f"File not found: {file_path}")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>