In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data from output.csv
data = pd.read_csv('../output/output.csv')

# Split features and labels
feature_df = data.drop(data.columns[:2], axis=1)
X = np.asarray(feature_df)
y = np.asarray(data['pii_exist'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets for global model
X_train_global, X_test_global, y_train_global, y_test_global = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding for global model
num_classes = len(np.unique(y))
y_train_one_hot_global = tf.one_hot(y_train_global, depth=num_classes).numpy()
y_test_one_hot_global = tf.one_hot(y_test_global, depth=num_classes).numpy()

# Define SVM model for global training
global_model = tf.keras.Sequential([
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the global model
global_model.compile(optimizer='adam',
                     loss='categorical_hinge',
                     metrics=['accuracy'])

# Define a function for training on each client's data
def train_on_client(X, y, model):
    model.fit(X, y, epochs=10, batch_size=32, verbose=0)
    return model

# Federated learning loop
for _ in range(10):  # 10 rounds of federated learning
    # Train the global model on aggregated data from all clients
    global_model = train_on_client(X_train_global, y_train_one_hot_global, global_model)
    
    # Client-specific training
    for _ in range(5):  # 5 rounds of training for each client
        # Client 1 trains on its data
        model = train_on_client(X_train, y_train_one_hot, global_model)

        # Client 2 trains on its data
        model = train_on_client(X_test, y_test_one_hot, global_model)

# Predict classes using the global model
y_pred_global = np.argmax(global_model.predict(X_test_global), axis=1)

# Calculate confusion matrix and F1 score for the global model
conf_matrix_global = confusion_matrix(y_test_global, y_pred_global)
f1_global = f1_score(y_test_global, y_pred_global, average='weighted')

print("Global Model Confusion Matrix:")
print(conf_matrix_global)
print("Global Model F1 Score:", f1_global)

# Predict classes for individual clients
y_pred_client1 = np.argmax(global_model.predict(X_test), axis=1)
y_pred_client2 = np.argmax(global_model.predict(X_test), axis=1)

# Calculate confusion matrix and F1 score for individual clients
conf_matrix_client1 = confusion_matrix(y_test, y_pred_client1)
f1_client1 = f1_score(y_test, y_pred_client1, average='weighted')

conf_matrix_client2 = confusion_matrix(y_test, y_pred_client2)
f1_client2 = f1_score(y_test, y_pred_client2, average='weighted')

print("\nClient 1 Model Confusion Matrix:")
print(conf_matrix_client1)
print("Client 1 Model F1 Score:", f1_client1)

print("\nClient 2 Model Confusion Matrix:")
print(conf_matrix_client2)
print("Client 2 Model F1 Score:", f1_client2)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
Global Model Confusion Matrix:
[[254  11   2   3]
 [  0  32   0   0]
 [  0   1  56   0]
 [  0   0   0   3]]
Global Model F1 Score: 0.9563981687573293
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step

Client 1 Model Confusion Matrix:
[[254  11   2   3]
 [  0  32   0   0]
 [  0   1  56   0]
 [  0   0   0   3]]
Client 1 Model F1 Score: 0.9563981687573293

Client 2 Model Confusion Matrix:
[[254  11   2   3]
 [  0  32   0   0]
 [  0   1  56   0]
 [  0   0   0   3]]
Client 2 Model F1 Score: 0.9563981687573293
