In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import resample
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

df = pd.read_csv("synthetic_data.csv")

df_majority = df[df["IncomeClass"] == 0]
df_minority = df[df["IncomeClass"] == 1]

df_minority_upsampled = resample(df_minority, 
                                 replace=True,  
                                 n_samples=len(df_majority), 
                                 random_state=42)

df = pd.concat([df_majority, df_minority_upsampled])

df = df.sample(frac=1, random_state=42).reset_index(drop=True)

X = df.drop(columns=["IncomeClass"])
y = df["IncomeClass"]

categorical_cols = ['Gender', 'Race', 'Education', 'WorkClass', 'Occupation', 'MaritalStatus', 'NativeCountry']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])  
    label_encoders[col] = le  

numerical_cols = ['Age', 'HoursPerWeek', 'CapitalGain', 'CapitalLoss']
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

X = X.values
y = y.values

N_CLIENTS = 10
X_clients = np.array_split(X, N_CLIENTS)
y_clients = np.array_split(y, N_CLIENTS)

class ImprovedFLModel(nn.Module):
    def __init__(self, input_size):
        super(ImprovedFLModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        return torch.sigmoid(self.fc3(x))

input_size = X.shape[1]
global_model = ImprovedFLModel(input_size)

criterion = nn.BCELoss()

NUM_GLOBAL_ROUNDS = 50 
LOCAL_EPOCHS = 10  
LEARNING_RATE = 0.001 

for round_num in range(NUM_GLOBAL_ROUNDS):  
    local_models = []
    
    for i in range(N_CLIENTS):
        local_model = ImprovedFLModel(input_size)
        local_model.load_state_dict(global_model.state_dict())  
        local_optimizer = optim.Adam(local_model.parameters(), lr=LEARNING_RATE)

        X_train = torch.tensor(X_clients[i], dtype=torch.float32)
        y_train = torch.tensor(y_clients[i], dtype=torch.float32).view(-1, 1)

        for _ in range(LOCAL_EPOCHS):  
            local_optimizer.zero_grad()
            output = local_model(X_train)
            loss = criterion(output, y_train)
            loss.backward()
            local_optimizer.step()

        local_models.append(local_model.state_dict())  

    global_weights = {key: torch.stack([model[key].float() for model in local_models]).mean(0) for key in global_model.state_dict()}
    global_model.load_state_dict(global_weights)

    print(f"Round {round_num + 1}/{NUM_GLOBAL_ROUNDS} completed.")

torch.save(global_model.state_dict(), "fl_model_improved.pth")
print("✅ Improved Federated Learning training completed! 🚀")

df_test = pd.read_csv("synthetic_data.csv")  

X_test = df_test.drop(columns=["IncomeClass"])
y_test = df_test["IncomeClass"]

for col in categorical_cols:
    X_test[col] = label_encoders[col].transform(X_test[col])  

X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = y_test.values 

global_model.eval()

with torch.no_grad():
    y_pred_proba = global_model(X_test).numpy().flatten()  
    y_pred = (y_pred_proba >= 0.5).astype(int)  

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)

metrics_df = pd.DataFrame([{"":"FedAvg","Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1-score": f1, "ROC-AUC": roc_auc}])
metrics_df.to_csv("fl_model_metrics.csv", index=False)

print(f"✅ Metrics saved to fl_model_metrics.csv")

print("\n**Improved Federated Learning Model Performance**")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")

Round 1/50 completed.
Round 2/50 completed.
Round 3/50 completed.
Round 4/50 completed.
Round 5/50 completed.
Round 6/50 completed.
Round 7/50 completed.
Round 8/50 completed.
Round 9/50 completed.
Round 10/50 completed.
Round 11/50 completed.
Round 12/50 completed.
Round 13/50 completed.
Round 14/50 completed.
Round 15/50 completed.
Round 16/50 completed.
Round 17/50 completed.
Round 18/50 completed.
Round 19/50 completed.
Round 20/50 completed.
Round 21/50 completed.
Round 22/50 completed.
Round 23/50 completed.
Round 24/50 completed.
Round 25/50 completed.
Round 26/50 completed.
Round 27/50 completed.
Round 28/50 completed.
Round 29/50 completed.
Round 30/50 completed.
Round 31/50 completed.
Round 32/50 completed.
Round 33/50 completed.
Round 34/50 completed.
Round 35/50 completed.
Round 36/50 completed.
Round 37/50 completed.
Round 38/50 completed.
Round 39/50 completed.
Round 40/50 completed.
Round 41/50 completed.
Round 42/50 completed.
Round 43/50 completed.
Round 44/50 complete

In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import resample
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

df = pd.read_csv("synthetic_data.csv")

df_majority = df[df["IncomeClass"] == 0]
df_minority = df[df["IncomeClass"] == 1]

df_minority_upsampled = resample(df_minority, replace=True, n_samples=len(df_majority), random_state=42)
df = pd.concat([df_majority, df_minority_upsampled]).sample(frac=1, random_state=42).reset_index(drop=True)

X = df.drop(columns=["IncomeClass"])
y = df["IncomeClass"]

categorical_cols = ['Gender', 'Race', 'Education', 'WorkClass', 'Occupation', 'MaritalStatus', 'NativeCountry']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

numerical_cols = ['Age', 'HoursPerWeek', 'CapitalGain', 'CapitalLoss']
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

X, y = X.values, y.values
N_CLIENTS = 5
X_clients, y_clients = np.array_split(X, N_CLIENTS), np.array_split(y, N_CLIENTS)

class ImprovedFLModel(nn.Module):
    def __init__(self, input_size):
        super(ImprovedFLModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        return torch.sigmoid(self.fc3(x))

input_size = X.shape[1]
global_models = {
    "FedAdam": ImprovedFLModel(input_size),
    "FedProx": ImprovedFLModel(input_size),
    "FedOpt": ImprovedFLModel(input_size)
}

criterion = nn.BCELoss()
LEARNING_RATE = 0.001

def train_local_model(model, X_train, y_train, method):
    local_model = ImprovedFLModel(input_size)
    local_model.load_state_dict(model.state_dict())
    
    if method == "FedAdam":
        local_optimizer = optim.Adam(local_model.parameters(), lr=LEARNING_RATE)
    elif method == "FedProx":
        local_optimizer = optim.SGD(local_model.parameters(), lr=LEARNING_RATE, momentum=0.9)
    elif method == "FedOpt":
        local_optimizer = optim.AdamW(local_model.parameters(), lr=LEARNING_RATE)

    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

    LOCAL_EPOCHS = 10
    for _ in range(LOCAL_EPOCHS):
        local_optimizer.zero_grad()
        output = local_model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        local_optimizer.step()

    return local_model.state_dict()

def aggregate_models(global_model, local_models):
    global_weights = {
        key: torch.stack([model[key].float() for model in local_models]).mean(0)
        for key in global_model.state_dict()
    }
    global_model.load_state_dict(global_weights)

NUM_GLOBAL_ROUNDS = 50

for method in global_models.keys():
    print(f"\n🔹 Running {method}...")
    for round_num in range(NUM_GLOBAL_ROUNDS):  
        local_models = []
        for i in range(N_CLIENTS):
            local_weights = train_local_model(global_models[method], X_clients[i], y_clients[i], method)
            local_models.append(local_weights)
        aggregate_models(global_models[method], local_models)
        print(f"Round {round_num + 1}/{NUM_GLOBAL_ROUNDS} completed for {method}.")

df_test = pd.read_csv("synthetic_data.csv")

X_test = df_test.drop(columns=["IncomeClass"])
y_test = df_test["IncomeClass"]

for col in categorical_cols:
    X_test[col] = label_encoders[col].transform(X_test[col])  
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = y_test.values

metrics_file = "fl_model_metrics.csv"
write_header = not os.path.exists(metrics_file)

results = []

for method, model in global_models.items():
    model.eval()
    with torch.no_grad():
        y_pred_proba = model(X_test).numpy().flatten()  
        y_pred = (y_pred_proba >= 0.5).astype(int)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    results.append([method, accuracy, precision, recall, f1, roc_auc])
    print(f"\n📊 **{method} Performance**")
    print(f"✅ Accuracy: {accuracy:.4f}")
    print(f"✅ Precision: {precision:.4f}")
    print(f"✅ Recall: {recall:.4f}")
    print(f"✅ F1-score: {f1:.4f}")
    print(f"✅ ROC-AUC: {roc_auc:.4f}")

metrics_df = pd.DataFrame(results, columns=["Method", "Accuracy", "Precision", "Recall", "F1-score", "ROC-AUC"])
metrics_df.to_csv(metrics_file, mode='a', header=write_header, index=False)

print("Metrics appended to fl_model_metrics.csv")


🔹 Running FedAdam...
Round 1/50 completed for FedAdam.
Round 2/50 completed for FedAdam.
Round 3/50 completed for FedAdam.
Round 4/50 completed for FedAdam.
Round 5/50 completed for FedAdam.
Round 6/50 completed for FedAdam.
Round 7/50 completed for FedAdam.
Round 8/50 completed for FedAdam.
Round 9/50 completed for FedAdam.
Round 10/50 completed for FedAdam.
Round 11/50 completed for FedAdam.
Round 12/50 completed for FedAdam.
Round 13/50 completed for FedAdam.
Round 14/50 completed for FedAdam.
Round 15/50 completed for FedAdam.
Round 16/50 completed for FedAdam.
Round 17/50 completed for FedAdam.
Round 18/50 completed for FedAdam.
Round 19/50 completed for FedAdam.
Round 20/50 completed for FedAdam.
Round 21/50 completed for FedAdam.
Round 22/50 completed for FedAdam.
Round 23/50 completed for FedAdam.
Round 24/50 completed for FedAdam.
Round 25/50 completed for FedAdam.
Round 26/50 completed for FedAdam.
Round 27/50 completed for FedAdam.
Round 28/50 completed for FedAdam.
Round 2