In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data from output.csv

data = pd.read_csv('../output/output.csv')

# Split features and labels
feature_df = data.drop(data.columns[:2], axis=1)
X = np.asarray(feature_df)
y = np.asarray(data['pii_exist'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
Xg_train, Xg_test, yg_train, yg_test = train_test_split(X_scaled, y, test_size=0.2, random_state=101)


# Convert labels to one-hot encoding
num_classes = len(np.unique(y))
y_train_one_hot = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_one_hot = tf.one_hot(y_test, depth=num_classes).numpy()

# Define privacy parameters
epsilon = 1000  # Privacy budget
delta = 1e-6  # Desired overall privacy failure probability

# Define SVM model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Define a function for training on each client's data with differential privacy
def train_on_client_dp(X, y, model, epsilon, delta):
    # Compute the scale parameter for Gaussian noise
    delta_prime = delta / (2 * len(X) / 32)  # Assuming batch size of 32
    c = np.sqrt(2 * np.log(1.25 / delta_prime))
    sensitivity = 2 * c
    sigma = c * sensitivity / epsilon


    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(X, y, epochs=10, batch_size=32, verbose=0)

    # Add noise to the gradients
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Dense):
            for weight in layer.trainable_variables:
                noise = tf.random.normal(shape=weight.shape, stddev=sigma)
                weight.assign_add(noise)

    return model

# Federated learning loop with differential privacy
global_model = tf.keras.models.clone_model(model)  # Create a copy of the original model
noclient = 10
for i in range(noclient):  # 20 clients
    # Divide the training data into 10 parts
    start_index = int(i * len(X_train) / noclient)
    end_index = int((i + 1) * len(X_train) / noclient)
    X_client_train = X_train[start_index:end_index]
    y_client_train = y_train_one_hot[start_index:end_index]

    # Train client model on its data with differential privacy
    client_model = train_on_client_dp(X_client_train, y_client_train, global_model, epsilon, delta)

    # Aggregate weights of the client model onto the global model
    for global_layer, client_layer in zip(global_model.layers, client_model.layers):
        global_layer_weights = global_layer.get_weights()
        client_layer_weights = client_layer.get_weights()
        aggregated_weights = [(w1 + w2) / 2 for w1, w2 in zip(global_layer_weights, client_layer_weights)]
        global_layer.set_weights(aggregated_weights)

    # Evaluate client model on the test set
    y_pred = np.argmax(client_model.predict(X_test), axis=1)
    yg_pred = np.argmax(global_model.predict(Xg_test), axis=1)
    f1 = f1_score(y_test, y_pred, average='weighted')
    f1g = f1_score(yg_test, yg_pred, average='weighted')
    print("Client", i+1, "F1 Score:", f1,"F1 Global",f1g)


# Predict classes using the global model
y_pred = np.argmax(global_model.predict(X_test), axis=1)

# Calculate confusion matrix and F1 score
conf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Confusion Matrix:")
print(conf_matrix)
print("F1 Score:", f1)


In [8]:
#final with epoch 1000 f1score: 0.95
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data from output.csv

data = pd.read_csv('../output/output.csv')

# Split features and labels
feature_df = data.drop(data.columns[:2], axis=1)
X = np.asarray(feature_df)
y = np.asarray(data['pii_exist'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
Xg_train, Xg_test, yg_train, yg_test = train_test_split(X_scaled, y, test_size=0.2, random_state=101)


# Convert labels to one-hot encoding
num_classes = len(np.unique(y))
y_train_one_hot = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_one_hot = tf.one_hot(y_test, depth=num_classes).numpy()

# Define privacy parameters
epsilon = 100  # Privacy budget
delta = 1e-2  # Desired overall privacy failure probability

# Create a simple MLP model for classification
model = tf.keras.Sequential([
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Define a function for training on each client's data with differential privacy
def train_on_client_dp(X, y, model, epsilon, delta):
    # Compute the scale parameter for Gaussian noise
    delta_prime = delta / (2 * len(X) / 32)  # Assuming batch size of 32
    c = np.sqrt(2 * np.log(1.25 / delta_prime))
    sensitivity = 2 * c
    sigma = c * sensitivity / epsilon


    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(X, y, epochs=10, batch_size=32, verbose=0)

    # Add noise to the gradients
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Dense):
            for weight in layer.trainable_variables:
                noise = tf.random.normal(shape=weight.shape, stddev=sigma)
                weight.assign_add(noise)

    return model

# Federated learning loop with differential privacy
global_model = tf.keras.models.clone_model(model)  # Create a copy of the original model
noclient = 10
for i in range(noclient):  # 20 clients
    # Divide the training data into 10 parts
    start_index = int(i * len(X_train) / noclient)
    end_index = int((i + 1) * len(X_train) / noclient)
    X_client_train = X_train[start_index:end_index]
    y_client_train = y_train_one_hot[start_index:end_index]

    # Train client model on its data with differential privacy
    client_model = train_on_client_dp(X_client_train, y_client_train, global_model, epsilon, delta)

    # Aggregate weights of the client model onto the global model
    for global_layer, client_layer in zip(global_model.layers, client_model.layers):
        global_layer_weights = global_layer.get_weights()
        client_layer_weights = client_layer.get_weights()
        aggregated_weights = [(w1 + w2) / 2 for w1, w2 in zip(global_layer_weights, client_layer_weights)]
        global_layer.set_weights(aggregated_weights)

    # Evaluate client model on the test set
    y_pred = np.argmax(client_model.predict(X_test), axis=1)
    yg_pred = np.argmax(global_model.predict(Xg_test), axis=1)
    f1 = f1_score(y_test, y_pred, average='weighted')
    f1g = f1_score(yg_test, yg_pred, average='weighted')
    print("Client", i+1, "F1 Score:", f1,"F1 Global",f1g)


# Predict classes using the global model
y_pred = np.argmax(global_model.predict(X_test), axis=1)

# Calculate confusion matrix and F1 score
conf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Confusion Matrix:")
print(conf_matrix)
print("F1 Score:", f1)


Client 1 F1 Score: 0.6956970621783001 F1 Global 0.7146790608953445
Client 2 F1 Score: 0.7858105034723117 F1 Global 0.8018835229129663
Client 3 F1 Score: 0.7504240916480105 F1 Global 0.7534323367427223
Client 4 F1 Score: 0.774207126125151 F1 Global 0.787744786822341
Client 5 F1 Score: 0.8792817367440144 F1 Global 0.8934167607519131
Client 6 F1 Score: 0.7885060877450318 F1 Global 0.8035898040139821
Client 7 F1 Score: 0.9041882414300652 F1 Global 0.9291423985691085
Client 8 F1 Score: 0.8980957809299891 F1 Global 0.9149100893221532
Client 9 F1 Score: 0.8962039282159039 F1 Global 0.9283524685363949
Client 10 F1 Score: 0.9177294824134973 F1 Global 0.926618422804901
Confusion Matrix:
[[337  26]
 [ 15 114]]
F1 Score: 0.9177294824134973


In [None]:
# https://colab.research.google.com/drive/#create=1&folderId=10t9mdGTqNlaVHNGIIp281bcfh30cdPOZ

In [None]:
#bina differential privacy ke : f1score: 0.95
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data from output.csv

data = pd.read_csv('../output/output.csv')

# Split features and labels
feature_df = data.drop(data.columns[:2], axis=1)
X = np.asarray(feature_df)
y = np.asarray(data['pii_exist'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
Xg_train, Xg_test, yg_train, yg_test = train_test_split(X_scaled, y, test_size=0.1, random_state=101)


# Convert labels to one-hot encoding
num_classes = len(np.unique(y))
y_train_one_hot = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_one_hot = tf.one_hot(y_test, depth=num_classes).numpy()

# Define privacy parameters

# Define SVM model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Define a function for training on each client's data with differential privacy
def train_on_client_dp(X, y, model):



    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(X, y, epochs=10, batch_size=32, verbose=0)


    return model

# Federated learning loop with differential privacy
global_model = tf.keras.models.clone_model(model)  # Create a copy of the original model
noclient = 10
for i in range(noclient):  # 20 clients
    # Divide the training data into 10 parts
    start_index = int(i * len(X_train) / noclient)
    end_index = int((i + 1) * len(X_train) / noclient)
    X_client_train = X_train[start_index:end_index]
    y_client_train = y_train_one_hot[start_index:end_index]

    # Train client model on its data with differential privacy
    client_model = train_on_client_dp(X_client_train, y_client_train, global_model)

    # Aggregate weights of the client model onto the global model
    for global_layer, client_layer in zip(global_model.layers, client_model.layers):
        global_layer_weights = global_layer.get_weights()
        client_layer_weights = client_layer.get_weights()
        aggregated_weights = [(w1 + w2) / 2 for w1, w2 in zip(global_layer_weights, client_layer_weights)]
        global_layer.set_weights(aggregated_weights)

    # Evaluate client model on the test set
    y_pred = np.argmax(client_model.predict(X_test), axis=1)
    yg_pred = np.argmax(global_model.predict(Xg_test), axis=1)
    f1 = f1_score(y_test, y_pred, average='weighted')
    f1g = f1_score(yg_test, yg_pred, average='weighted')
    print("Client", i+1, "F1 Score:", f1,"F1 Global",f1g)


# Predict classes using the global model
y_pred = np.argmax(global_model.predict(X_test), axis=1)

# Calculate confusion matrix and F1 score
conf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
print("Confusion Matrix:")

print(conf_matrix)

print("F1 Score:", f1)

In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data from output.csv
data = pd.read_csv('/media/jay/Windows/Users/jay/Downloads/nit_research/output/output_1.csv')

X = np.asarray(data.drop(data.columns[:2], axis=1))
y = np.asarray(data['pii_exist'])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
Xg_train, Xg_test, yg_train, yg_test = train_test_split(X_scaled, y, test_size=0.2, random_state=101)

# Convert labels to one-hot encoding
num_classes = len(np.unique(y))
y_train_one_hot = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_one_hot = tf.one_hot(y_test, depth=num_classes).numpy()

# Define privacy parameters
epsilon = 100  # Privacy budget
delta = 1e-2   # Desired overall privacy failure probability

# Create a simple MLP model for classification
model = tf.keras.Sequential([
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Define a function for training on each client's data with differential privacy
def train_on_client_dp(X, y, epsilon, delta):
    # Compute the scale parameter for Gaussian noise
    delta_prime = delta / (2 * len(X) / 32)  # Assuming batch size of 32
    c = np.sqrt(2 * np.log(1.25 / delta_prime))
    sensitivity = 2 * c
    sigma = c * sensitivity / epsilon

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(X, y, epochs=10, batch_size=32, verbose=0)

    # Add noise to the gradients
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Dense):
            for weight in layer.trainable_variables:
                noise = tf.random.normal(shape=weight.shape, stddev=sigma)
                weight.assign_add(noise)

    return model

# Define the global aggregator function
def global_aggregator(global_model, client_model):
    """
    Aggregate weights of the client model onto the global model.

    Parameters:
        global_model (tf.keras.Model): Global model to be updated.
        client_model (tf.keras.Model): Client model whose weights are to be aggregated onto the global model.

    Returns:
        tf.keras.Model: Updated global model with aggregated weights.
    """
    for global_layer, client_layer in zip(global_model.layers, client_model.layers):
        global_layer_weights = global_layer.get_weights()
        client_layer_weights = client_layer.get_weights()
        aggregated_weights = [(w1 + w2) / 2 for w1, w2 in zip(global_layer_weights, client_layer_weights)]
        global_layer.set_weights(aggregated_weights)

    return global_model

# Federated learning loop with differential privacy
global_model = tf.keras.models.clone_model(model)  # Create a copy of the original model
noclient = 10
for i in range(noclient):  # 10 clients
    # Divide the training data into parts for each client
    start_index = int(i * len(X_train) / noclient)
    end_index = int((i + 1) * len(X_train) / noclient)
    X_client_train = X_train[start_index:end_index]
    y_client_train = y_train_one_hot[start_index:end_index]

    # Train client model on its data with differential privacy
    client_model = train_on_client_dp(X_client_train, y_client_train, epsilon, delta)

    # Aggregate weights of the client model onto the global model
    global_model = global_aggregator(global_model, client_model)

    # Evaluate client model on the test set
    y_pred = np.argmax(client_model.predict(X_test), axis=1)
    yg_pred = np.argmax(global_model.predict(Xg_test), axis=1)
    f1 = f1_score(y_test, y_pred, average='weighted')
    f1g = f1_score(yg_test, yg_pred, average='weighted')
    print("Client", i+1, "F1 Score:", f1, "F1 Global", f1g)

# Predict classes using the global model
y_pred = np.argmax(global_model.predict(X_test), axis=1)

# Calculate confusion matrix and F1 score
conf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Confusion Matrix:")
print(conf_matrix)
print("F1 Score:", f1)


Client 1 F1 Score: 0.6711565714859437 F1 Global 0.7115745284947513
Client 2 F1 Score: 0.792791104273026 F1 Global 0.8061065740623753
Client 3 F1 Score: 0.8533691652049389 F1 Global 0.8586992927201155
Client 4 F1 Score: 0.8629762502690679 F1 Global 0.8710046857957257
Client 5 F1 Score: 0.8863817043354489 F1 Global 0.8813063044885525
Client 6 F1 Score: 0.8867901562898303 F1 Global 0.8971321305567453
Client 7 F1 Score: 0.7577639608121605 F1 Global 0.7773007731873723
Client 8 F1 Score: 0.7693367956861369 F1 Global 0.7937781319846768
Client 9 F1 Score: 0.9020461955897532 F1 Global 0.9305744443752815
Client 10 F1 Score: 0.9044005068800167 F1 Global 0.9096473735212135
Confusion Matrix:
[[238  32]
 [  4  88]]
F1 Score: 0.9044005068800167
