In [None]:
from google.colab import drive
drive.mount('/content/drive')
filepath = '/content/drive/MyDrive/Colab Notebooks/creditcard.csv'

import os
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler  # For normalization

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001):  # Lowered learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Main function to start Federated Learning
def main():

    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=3, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  self.scaler = GradScaler()
  with autocast():  # Automatically uses the current device (cuda or cpu)



Round 1/3
Client 0: Epoch 1, Loss: 0.04676369330673161
Client 0: Epoch 2, Loss: 0.003725063718521729
Client 0: Epoch 3, Loss: 0.0030485085174082383
Client 0: Epoch 4, Loss: 0.002754307229240741
Client 0: Epoch 5, Loss: 0.002603594578931849
Client 1: Epoch 1, Loss: 0.047300018584249794
Client 1: Epoch 2, Loss: 0.0035447473208680465
Client 1: Epoch 3, Loss: 0.0027840336466809497
Client 1: Epoch 4, Loss: 0.002418792843294943
Client 1: Epoch 5, Loss: 0.002174602399059205
Client 2: Epoch 1, Loss: 0.04687716601945683
Client 2: Epoch 2, Loss: 0.00392890607635314
Client 2: Epoch 3, Loss: 0.0032473633788830483
Client 2: Epoch 4, Loss: 0.0028999687196304765
Client 2: Epoch 5, Loss: 0.0026505313175804943
Global Model Accuracy after round 1: 0.9994

Round 2/3
Client 0: Epoch 1, Loss: 0.0028142921340972714
Client 0: Epoch 2, Loss: 0.0024866437007498117
Client 0: Epoch 3, Loss: 0.0023394796862994676
Client 0: Epoch 4, Loss: 0.0021440347288102135
Client 0: Epoch 5, Loss: 0.001922866201869027
Client 

In [None]:
import torch
import numpy as np
from torch.distributions.normal import Normal

# Simulate secure aggregation using additive homomorphic encryption
class SecureAggregation:
    def __init__(self, num_clients):
        self.num_clients = num_clients
        self.shared_keys = self._generate_shared_keys()

    def _generate_shared_keys(self):
        # Generate pairwise shared keys for clients (for simplicity, we use random noise)
        keys = {}
        for i in range(self.num_clients):
            for j in range(i + 1, self.num_clients):
                keys[(i, j)] = torch.randn(1).item()  # Shared key between client i and j
                keys[(j, i)] = -keys[(i, j)]  # Symmetric key
        return keys

    def encrypt(self, client_id, weights):
        # Add noise to the weights using shared keys
        encrypted_weights = {}
        for key, value in weights.items():
            noise = torch.zeros_like(value)
            for other_client in range(self.num_clients):
                if other_client != client_id:
                    noise += self.shared_keys[(client_id, other_client)]
            encrypted_weights[key] = value + noise
        return encrypted_weights

    def decrypt(self, aggregated_weights):
        # Remove noise from the aggregated weights
        decrypted_weights = {}
        for key, value in aggregated_weights.items():
            decrypted_weights[key] = value / self.num_clients  # Average the weights
        return decrypted_weights


# Server for Federated Learning with Secure Aggregation
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs
        self.device = device
        self.secure_aggregator = SecureAggregation(len(clients))

    def aggregate_weights(self, client_weights):
        # Aggregate encrypted weights
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).sum(0)
        # Decrypt the aggregated weights
        global_weights = self.secure_aggregator.decrypt(global_weights)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)
                # Encrypt client updates before sending to the server
                encrypted_weights = self.secure_aggregator.encrypt(client.client_id, client.get_weights())
                client_weights.append(encrypted_weights)

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Main function to start Federated Learning with Secure Aggregation
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=3, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

  self.scaler = GradScaler()
  with autocast():  # Automatically uses the current device (cuda or cpu)



Round 1/3
Client 0: Epoch 1, Loss: 0.05722971495889008
Client 0: Epoch 2, Loss: 0.0039030594959942955
Client 0: Epoch 3, Loss: 0.003224790288608524
Client 0: Epoch 4, Loss: 0.0028488459233265203
Client 0: Epoch 5, Loss: 0.0025484486758175326
Client 1: Epoch 1, Loss: 0.05759777675809224
Client 1: Epoch 2, Loss: 0.0036292128419519653
Client 1: Epoch 3, Loss: 0.0027812597017304497
Client 1: Epoch 4, Loss: 0.0024666830519692487
Client 1: Epoch 5, Loss: 0.002256768293337066
Client 2: Epoch 1, Loss: 0.0586805955574337
Client 2: Epoch 2, Loss: 0.0040113353243765755
Client 2: Epoch 3, Loss: 0.003313665021617529
Client 2: Epoch 4, Loss: 0.003043548370806246
Client 2: Epoch 5, Loss: 0.0027485832033991553
Global Model Accuracy after round 1: 0.9995

Round 2/3
Client 0: Epoch 1, Loss: 0.0028214265132472374
Client 0: Epoch 2, Loss: 0.002502795950347759
Client 0: Epoch 3, Loss: 0.0022707345801365984
Client 0: Epoch 4, Loss: 0.0020817032800995207
Client 0: Epoch 5, Loss: 0.0019227849768663787
Client

In [None]:
from google.colab import drive
drive.mount('/content/drive')
filepath = '/content/drive/MyDrive/Colab Notebooks/creditcard.csv'

import os
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from opacus import PrivacyEngine  # Import Opacus for differential privacy

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning with Differential Privacy
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

        # Add Differential Privacy
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.0,  # Controls the amount of noise added
            max_grad_norm=1.0,  # Clipping norm for gradients
        )

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Main function to start Federated Learning
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 64
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device)
               for i in range(num_clients)]

    global_epochs = 20
    server = Server(model=global_model, clients=clients, num_rounds=10, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


ModuleNotFoundError: No module named 'opacus'

In [None]:
import torch
import numpy as np
from torch.distributions.normal import Normal

# Differential Privacy Parameters
CLIP_VALUE = 1.0  # Gradient clipping value
NOISE_SCALE = 0.1  # Scale of Gaussian noise
DELTA = 1e-5  # Privacy parameter (δ)
EPSILON = 1.0  # Privacy parameter (ε)

# Secure Aggregation with Differential Privacy
class SecureAggregation:
    def __init__(self, num_clients):
        self.num_clients = num_clients
        self.shared_keys = self._generate_shared_keys()

    def _generate_shared_keys(self):
        keys = {}
        for i in range(self.num_clients):
            for j in range(i + 1, self.num_clients):
                keys[(i, j)] = torch.randn(1).item()  # Shared key between client i and j
                keys[(j, i)] = -keys[(i, j)]  # Symmetric key
        return keys

    def encrypt(self, client_id, weights):
        encrypted_weights = {}
        for key, value in weights.items():
            noise = torch.zeros_like(value)
            for other_client in range(self.num_clients):
                if other_client != client_id:
                    noise += self.shared_keys[(client_id, other_client)]
            encrypted_weights[key] = value + noise
        return encrypted_weights

    def decrypt(self, aggregated_weights):
        decrypted_weights = {}
        for key, value in aggregated_weights.items():
            decrypted_weights[key] = value / self.num_clients  # Average the weights
        return decrypted_weights


# Client for Federated Learning with Differential Privacy
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping for differential privacy
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=CLIP_VALUE)

                # Add Gaussian noise for differential privacy
                for param in self.local_model.parameters():
                    if param.grad is not None:
                        noise = torch.normal(mean=0.0, std=NOISE_SCALE, size=param.grad.shape, device=self.device)
                        param.grad += noise

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Server for Federated Learning with Secure Aggregation and Differential Privacy
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs
        self.device = device
        self.secure_aggregator = SecureAggregation(len(clients))

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).sum(0)
        global_weights = self.secure_aggregator.decrypt(global_weights)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)
                encrypted_weights = self.secure_aggregator.encrypt(client.client_id, client.get_weights())
                client_weights.append(encrypted_weights)

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Main function to start Federated Learning with Secure Aggregation and Differential Privacy
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

  self.scaler = GradScaler()
  with autocast():



Round 1/5
Client 0: Epoch 1, Loss: 0.04519153670939399
Client 0: Epoch 2, Loss: 0.003746561360399913
Client 0: Epoch 3, Loss: 0.003017685484174359
Client 0: Epoch 4, Loss: 0.0026940576882802246
Client 0: Epoch 5, Loss: 0.0025173515090563415
Client 1: Epoch 1, Loss: 0.04581581307347572
Client 1: Epoch 2, Loss: 0.0037730547909216095
Client 1: Epoch 3, Loss: 0.0029800826710831845
Client 1: Epoch 4, Loss: 0.002567451820289932
Client 1: Epoch 5, Loss: 0.002315095854136958
Client 2: Epoch 1, Loss: 0.046018445402679596
Client 2: Epoch 2, Loss: 0.003697291535396747
Client 2: Epoch 3, Loss: 0.003166590970466204
Client 2: Epoch 4, Loss: 0.0028102758441358906
Client 2: Epoch 5, Loss: 0.0025524502297825705
Global Model Accuracy after round 1: 0.9996

Round 2/5
Client 0: Epoch 1, Loss: 0.002775246895006846
Client 0: Epoch 2, Loss: 0.002435279204685707
Client 0: Epoch 3, Loss: 0.002263515327744054
Client 0: Epoch 4, Loss: 0.0021037722627428126
Client 0: Epoch 5, Loss: 0.001961041460432667
Client 1:

In [None]:
from google.colab import drive
drive.mount('/content/drive')
filepath = '/content/drive/MyDrive/Colab Notebooks/creditcard.csv'

import os
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001):  # Lowered learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


# Main function to start Federated Learning
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=3, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Class distribution before SMOTE: Counter({0: 284315, 1: 492})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})


ValueError: Sum of input lengths does not equal the length of the input dataset!

In [None]:
###Orignal

In [None]:

import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Autoencoder for Anomaly Detection
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size),
            nn.Sigmoid()  # Ensure output is in the same range as input
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001):  # Lowered learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

    # Anomaly Detection using Autoencoder
    print("\nTraining Autoencoder for Anomaly Detection...")
    autoencoder = Autoencoder(input_size=input_size, hidden_size=hidden_size).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

    # Train the autoencoder on normal transactions
    normal_data = dataset.data[dataset.data['Class'] == 0].iloc[:, :-1].values
    normal_loader = DataLoader(torch.tensor(normal_data, dtype=torch.float32), batch_size=64, shuffle=True)

    for epoch in range(10):
        for data in normal_loader:
            data = data.to(device)
            optimizer.zero_grad()
            reconstructed = autoencoder(data)
            loss = criterion(reconstructed, data)
            loss.backward()
            optimizer.step()
        print(f"Autoencoder Epoch {epoch + 1}, Loss: {loss.item()}")

    # Evaluate the autoencoder on test data
    test_data = torch.tensor(dataset.data.iloc[:, :-1].values, dtype=torch.float32).to(device)
    with torch.no_grad():
        reconstructed = autoencoder(test_data)
        reconstruction_error = torch.mean((reconstructed - test_data) ** 2, dim=1).cpu().numpy()

    # Classify anomalies based on reconstruction error
    threshold = np.percentile(reconstruction_error, 95)  # 95th percentile as threshold
    predictions = (reconstruction_error > threshold).astype(int)
    print(f"Anomaly Detection Results: {Counter(predictions)}")

if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})

Round 1/5


  self.scaler = GradScaler()
  with autocast():  # Automatically uses the current device (cuda or cpu)


Client 0: Epoch 1, Loss: 0.09080070789258943
Client 0: Epoch 2, Loss: 0.029322844448220378
Client 0: Epoch 3, Loss: 0.017563416083040746
Client 0: Epoch 4, Loss: 0.01274447896073765
Client 0: Epoch 5, Loss: 0.009920708085154871
Client 1: Epoch 1, Loss: 0.09220980838852844
Client 1: Epoch 2, Loss: 0.02991821269921267
Client 1: Epoch 3, Loss: 0.01844533606106219
Client 1: Epoch 4, Loss: 0.013398093406409654
Client 1: Epoch 5, Loss: 0.010723488028713217
Client 2: Epoch 1, Loss: 0.09201494176805271
Client 2: Epoch 2, Loss: 0.029627687333914045
Client 2: Epoch 3, Loss: 0.017594660283361055
Client 2: Epoch 4, Loss: 0.012740199375732602
Client 2: Epoch 5, Loss: 0.009875220140905949
Global Model Accuracy after round 1: 0.9974

Round 2/5
Client 0: Epoch 1, Loss: 0.010588092249770253
Client 0: Epoch 2, Loss: 0.008186413402146491
Client 0: Epoch 3, Loss: 0.007081497171088142
Client 0: Epoch 4, Loss: 0.006191899566930809
Client 0: Epoch 5, Loss: 0.005480994451849993
Client 1: Epoch 1, Loss: 0.0105

In [None]:
###Secure AGG

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
import random

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning with Secure Aggregation
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def secure_aggregate(self, client_weights):
        """Securely aggregate client weights using additive secret sharing."""
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            # Initialize aggregated weight with zeros
            aggregated_weight = torch.zeros_like(global_weights[key])

            # Sum all client weights
            for client_weight in client_weights:
                aggregated_weight += client_weight[key]

            # Average the aggregated weights
            global_weights[key] = aggregated_weight / len(client_weights)

        # Update the global model
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            # Securely aggregate client weights
            self.secure_aggregate(client_weights)

            # Evaluate the global model
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001):  # Lowered learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


# Main function to start Federated Learning
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})

Round 1/5


  self.scaler = GradScaler()
  with autocast():  # Automatically uses the current device (cuda or cpu)


Client 0: Epoch 1, Loss: 0.0911927540342516
Client 0: Epoch 2, Loss: 0.0285020705988387
Client 0: Epoch 3, Loss: 0.01779090823912168
Client 0: Epoch 4, Loss: 0.013366406072738663
Client 0: Epoch 5, Loss: 0.01068540356111642
Client 1: Epoch 1, Loss: 0.08772563370581292
Client 1: Epoch 2, Loss: 0.028277989000647884
Client 1: Epoch 3, Loss: 0.017362854239149687
Client 1: Epoch 4, Loss: 0.013083282024351554
Client 1: Epoch 5, Loss: 0.01047235895066133
Client 2: Epoch 1, Loss: 0.0894671554606455
Client 2: Epoch 2, Loss: 0.028320704672666352
Client 2: Epoch 3, Loss: 0.017484353131266853
Client 2: Epoch 4, Loss: 0.012929835650413632
Client 2: Epoch 5, Loss: 0.010052049334872247
Global Model Accuracy after round 1: 0.9974

Round 2/5
Client 0: Epoch 1, Loss: 0.010643444938672596
Client 0: Epoch 2, Loss: 0.008573431066690342
Client 0: Epoch 3, Loss: 0.007100808934123405
Client 0: Epoch 4, Loss: 0.006226293237895591
Client 0: Epoch 5, Loss: 0.005511420580876779
Client 1: Epoch 1, Loss: 0.01050207

In [None]:
# start of Secure AGG+ Diff Privacy

In [None]:
# Install Opacus
!pip install opacus

import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
from opacus import PrivacyEngine

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning with Secure Aggregation
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def secure_aggregate(self, client_weights):
        """Securely aggregate client weights using additive secret sharing."""
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            # Initialize aggregated weight with zeros
            aggregated_weight = torch.zeros_like(global_weights[key])

            # Sum all client weights
            for client_weight in client_weights:
                aggregated_weight += client_weight[key]

            # Average the aggregated weights
            global_weights[key] = aggregated_weight / len(client_weights)

        # Update the global model
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            # Securely aggregate client weights
            self.secure_aggregate(client_weights)

            # Evaluate the global model
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning with Differential Privacy
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001, epsilon=1.0, delta=1e-5):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

        # Differential Privacy
        self.epsilon = epsilon
        self.delta = delta
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.0,  # Controls the amount of noise
            max_grad_norm=1.0,  # Clips gradients to avoid exploding gradients
        )

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping is handled by Opacus
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")

        # Print privacy budget spent
        epsilon_spent, _ = self.privacy_engine.get_privacy_spent(self.delta)
        print(f"Client {self.client_id}: Privacy Budget Spent (ε = {epsilon_spent:.2f}, δ = {self.delta})")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


# Main function to start Federated Learning
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001,
                      epsilon=1.0,  # Privacy budget (ε)
                      delta=1e-5)   # Privacy parameter (δ)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})

Round 1/5


  self.scaler = GradScaler()


RuntimeError: Error(s) in loading state_dict for GradSampleModule:
	Missing key(s) in state_dict: "_module.fc1.weight", "_module.fc1.bias", "_module.fc2.weight", "_module.fc2.bias". 
	Unexpected key(s) in state_dict: "fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias". 

In [None]:
# Install Opacus
!pip install opacus



Collecting opacus
  Downloading opacus-1.5.3-py3-none-any.whl.metadata (8.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0->opacus)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0->opacus)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0->opacus)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0->opacus)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0->opacus)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0->opacus)
  Downloading nvidia_cufft_cu

NameError: name 'filepath' is not defined

In [None]:
from google.colab import drive
drive.mount('/content/drive')
filepath = '/content/drive/MyDrive/Colab Notebooks/creditcard.csv'

Mounted at /content/drive


In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
from opacus import PrivacyEngine

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning with Secure Aggregation
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def secure_aggregate(self, client_weights):
        """Securely aggregate client weights using additive secret sharing."""
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            # Initialize aggregated weight with zeros
            aggregated_weight = torch.zeros_like(global_weights[key])

            # Sum all client weights
            for client_weight in client_weights:
                aggregated_weight += client_weight[key]

            # Average the aggregated weights
            global_weights[key] = aggregated_weight / len(client_weights)

        # Update the global model
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            # Securely aggregate client weights
            self.secure_aggregate(client_weights)

            # Evaluate the global model
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning with Differential Privacy
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001, epsilon=1.0, delta=1e-5):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

        # Differential Privacy
        self.epsilon = epsilon
        self.delta = delta
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.0,  # Controls the amount of noise
            max_grad_norm=1.0,  # Clips gradients to avoid exploding gradients
        )

    def set_weights(self, global_weights):
        """Load weights into the underlying model wrapped by GradSampleModule."""
        self.local_model._module.load_state_dict(global_weights)

    def get_weights(self):
        """Extract the underlying model's state dictionary from GradSampleModule."""
        return self.local_model._module.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping is handled by Opacus
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")

        # Print privacy budget spent
        epsilon_spent, _ = self.privacy_engine.get_privacy_spent(self.delta)
        print(f"Client {self.client_id}: Privacy Budget Spent (ε = {epsilon_spent:.2f}, δ = {self.delta})")

# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


# Main function to start Federated Learning
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001,
                      epsilon=1.0,  # Privacy budget (ε)
                      delta=1e-5)   # Privacy parameter (δ)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})


  self.scaler = GradScaler()
  with autocast():  # Automatically uses the current device (cuda or cpu)
  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)



Round 1/5
Client 0: Epoch 1, Loss: 0.17277821397845033
Client 0: Epoch 2, Loss: 0.15546104765319332
Client 0: Epoch 3, Loss: 0.14085074306828194
Client 0: Epoch 4, Loss: 0.14088759959883151
Client 0: Epoch 5, Loss: 0.12499147889157414


AttributeError: 'PrivacyEngine' object has no attribute 'get_privacy_spent'

In [None]:
# Install the latest version of Opacus
!pip install opacus --upgrade

# Import the updated Opacus library
from opacus import PrivacyEngine
from opacus.accountants import RDPAccountant



In [None]:
### secure+diff

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
from opacus import PrivacyEngine

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Simple Feedforward Neural Network for Tabular Data
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)  # No softmax applied here, CrossEntropyLoss expects raw logits
        return out


# Server for Federated Learning with Secure Aggregation
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def secure_aggregate(self, client_weights):
        """Securely aggregate client weights using additive secret sharing."""
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            # Initialize aggregated weight with zeros
            aggregated_weight = torch.zeros_like(global_weights[key])

            # Sum all client weights
            for client_weight in client_weights:
                aggregated_weight += client_weight[key]

            # Average the aggregated weights
            global_weights[key] = aggregated_weight / len(client_weights)

        # Update the global model
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            # Securely aggregate client weights
            self.secure_aggregate(client_weights)

            # Evaluate the global model
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning with Differential Privacy
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001, epsilon=1.0, delta=1e-5):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()

        # Differential Privacy
        self.epsilon = epsilon
        self.delta = delta
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.0,  # Controls the amount of noise
            max_grad_norm=1.0,  # Clips gradients to avoid exploding gradients
        )

    def set_weights(self, global_weights):
        """Load weights into the underlying model wrapped by GradSampleModule."""
        self.local_model._module.load_state_dict(global_weights)

    def get_weights(self):
        """Extract the underlying model's state dictionary from GradSampleModule."""
        return self.local_model._module.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping is handled by Opacus
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")

        # Print privacy budget spent
        epsilon_spent = self.privacy_engine.accountant.get_epsilon(delta=self.delta)
        print(f"Client {self.client_id}: Privacy Budget Spent (ε = {epsilon_spent:.2f}, δ = {self.delta})")

# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


# Main function to start Federated Learning
def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001,
                      epsilon=1.0,  # Privacy budget (ε)
                      delta=1e-5)   # Privacy parameter (δ)
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})


  self.scaler = GradScaler()



Round 1/5


  with autocast():  # Automatically uses the current device (cuda or cpu)
  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


Client 0: Epoch 1, Loss: 0.17956766443492114
Client 0: Epoch 2, Loss: 0.15139026022255853
Client 0: Epoch 3, Loss: 0.14142154916587485
Client 0: Epoch 4, Loss: 0.1304839939811318
Client 0: Epoch 5, Loss: 0.12281733217891325
Client 0: Privacy Budget Spent (ε = 0.21, δ = 1e-05)
Client 1: Epoch 1, Loss: 0.18285399995152818
Client 1: Epoch 2, Loss: 0.15146229197796757
Client 1: Epoch 3, Loss: 0.14186519309166104
Client 1: Epoch 4, Loss: 0.1366659958814202
Client 1: Epoch 5, Loss: 0.11876968909993194
Client 1: Privacy Budget Spent (ε = 0.21, δ = 1e-05)
Client 2: Epoch 1, Loss: 0.17558632513089695
Client 2: Epoch 2, Loss: 0.14794762418742924
Client 2: Epoch 3, Loss: 0.14353775774081579
Client 2: Epoch 4, Loss: 0.13173141022495738
Client 2: Epoch 5, Loss: 0.12459572047991815
Client 2: Privacy Budget Spent (ε = 0.21, δ = 1e-05)
Global Model Accuracy after round 1: 0.9619

Round 2/5
Client 0: Epoch 1, Loss: 0.11639149931194614
Client 0: Epoch 2, Loss: 0.11584302913762269
Client 0: Epoch 3, Loss

In [None]:
#only Diff priv

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from opacus import PrivacyEngine

# FraudDetectionDataset Class
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Autoencoder for Anomaly Detection
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size),
            nn.Sigmoid()  # Ensure output is in the same range as input
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001, epsilon=1.0, delta=1e-5):  # Lowered learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()
        self.epsilon = epsilon
        self.delta = delta

        # Initialize PrivacyEngine
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.1,  # Adjust this value based on your privacy budget
            max_grad_norm=1.0,  # Gradient clipping norm
        )

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    # Ensure the dataset can be split evenly among the clients
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=FraudDetectionModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001,
                      epsilon=1.0,  # Privacy budget
                      delta=1e-5)  # Privacy parameter
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()

    # Anomaly Detection using Autoencoder
    print("\nTraining Autoencoder for Anomaly Detection...")
    autoencoder = Autoencoder(input_size=input_size, hidden_size=hidden_size).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

    # Train the autoencoder on normal transactions
    normal_data = dataset.data[dataset.data['Class'] == 0].iloc[:, :-1].values
    normal_loader = DataLoader(torch.tensor(normal_data, dtype=torch.float32), batch_size=64, shuffle=True)

    for epoch in range(10):
        for data in normal_loader:
            data = data.to(device)
            optimizer.zero_grad()
            reconstructed = autoencoder(data)
            loss = criterion(reconstructed, data)
            loss.backward()
            optimizer.step()
        print(f"Autoencoder Epoch {epoch + 1}, Loss: {loss.item()}")

    # Evaluate the autoencoder on test data
    test_data = torch.tensor(dataset.data.iloc[:, :-1].values, dtype=torch.float32).to(device)
    with torch.no_grad():
        reconstructed = autoencoder(test_data)
        reconstruction_error = torch.mean((reconstructed - test_data) ** 2, dim=1).cpu().numpy()

    # Classify anomalies based on reconstruction error
    threshold = np.percentile(reconstruction_error, 95)  # 95th percentile as threshold
    predictions = (reconstruction_error > threshold).astype(int)
    print(f"Anomaly Detection Results: {Counter(predictions)}")

if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})

Round 1/5


  self.scaler = GradScaler()


RuntimeError: Error(s) in loading state_dict for GradSampleModule:
	Missing key(s) in state_dict: "_module.fc1.weight", "_module.fc1.bias", "_module.fc2.weight", "_module.fc2.bias". 
	Unexpected key(s) in state_dict: "fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias". 

In [None]:
###Only Differential Privacy

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter
from opacus import PrivacyEngine

# FraudDetectionDataset Class (unchanged)
class FraudDetectionDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False):
        self.data = pd.read_csv(csv_file)

        # Check for NaN values in the dataset
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Filling with column mean.")
            self.data.fillna(self.data.mean(), inplace=True)  # Handle NaN values by filling with mean

        # Normalize the input features
        self.scaler = StandardScaler()
        self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])

        # Data Augmentation: Add noise to fraud cases
        if augment:
            self.augment_fraud_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_fraud_cases(self):
        """Add noise to fraud cases to create synthetic fraud samples."""
        fraud_indices = self.data[self.data['Class'] == 1].index
        num_frauds = len(fraud_indices)
        if num_frauds == 0:
            return

        # Generate synthetic fraud cases by adding Gaussian noise
        fraud_samples = self.data.iloc[fraud_indices, :-1].values
        noise = np.random.normal(0, 0.1, fraud_samples.shape)  # Small noise
        synthetic_samples = fraud_samples + noise

        # Append synthetic fraud cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[:-1])
        synthetic_data['Class'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_frauds} fraud cases. New fraud count: {len(self.data[self.data['Class'] == 1])}")


# Autoencoder for Anomaly Detection (unchanged)
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size),
            nn.Sigmoid()  # Ensure output is in the same range as input
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


# Server for Federated Learning (updated for key handling)
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            # Handle the _module prefix added by Opacus
            opacus_key = f"_module.{key}"  # Add prefix to match client keys
            if opacus_key in client_weights[0]:  # Check if the key exists in client weights
                global_weights[key] = torch.stack([client_weights[i][opacus_key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning (updated for key handling)
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001, epsilon=1.0, delta=1e-5):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()
        self.epsilon = epsilon
        self.delta = delta

        # Initialize PrivacyEngine
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.1,  # Adjust this value based on your privacy budget
            max_grad_norm=1.0,  # Gradient clipping norm
        )

    def set_weights(self, global_weights):
        # Handle the _module prefix added by Opacus
        new_global_weights = {}
        for key, value in global_weights.items():
            new_global_weights[f"_module.{key}"] = value
        self.local_model.load_state_dict(new_global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")

# Function to split the dataset across clients (unchanged)
def split_dataset(dataset, num_clients):
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


# Function to apply SMOTE to the dataset (unchanged)
def apply_smote(dataset):
    X = dataset.data.iloc[:, :-1].values
    y = dataset.data.iloc[:, -1].values
    print(f"Class distribution before SMOTE: {Counter(y)}")
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    print(f"Class distribution after SMOTE: {Counter(y_resampled)}")
    resampled_data = pd.DataFrame(X_resampled, columns=dataset.data.columns[:-1])
    resampled_data['Class'] = y_resampled
    dataset.data = resampled_data
    return dataset


def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = FraudDetectionDataset(csv_file=csv_file, augment=True)  # Enable data augmentation
    total_size = len(dataset)

    # Apply SMOTE to the dataset
    dataset = apply_smote(dataset)

    # Recalculate total_size after SMOTE
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (fraud or not)
    global_model = nn.Sequential(
        nn.Linear(input_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, num_classes)
    )

    # Define class weights
    class_weights = torch.tensor([1.0, 100.0])  # Higher weight for the minority class (fraud)
    clients = [Client(client_id=i,
                      model=nn.Sequential(
                          nn.Linear(input_size, hidden_size),
                          nn.ReLU(),
                          nn.Linear(hidden_size, num_classes)
                      ),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.001,
                      epsilon=1.0,  # Privacy budget
                      delta=1e-5)  # Privacy parameter
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()


if __name__ == '__main__':
    main()

Augmented 492 fraud cases. New fraud count: 984
Class distribution before SMOTE: Counter({0: 284315, 1: 984})
Class distribution after SMOTE: Counter({0: 284315, 1: 284315})

Round 1/5


  self.scaler = GradScaler()
  with autocast():  # Automatically uses the current device (cuda or cpu)
  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


Client 0: Epoch 1, Loss: 0.18033358278745226
Client 0: Epoch 2, Loss: 0.15285203651960286
Client 0: Epoch 3, Loss: 0.14601472868001528
Client 0: Epoch 4, Loss: 0.13689095406954663
Client 0: Epoch 5, Loss: 0.13687463956278403
Client 1: Epoch 1, Loss: 0.17944727539181127
Client 1: Epoch 2, Loss: 0.1566948464074229
Client 1: Epoch 3, Loss: 0.15002335946022569
Client 1: Epoch 4, Loss: 0.14922836426429878
Client 1: Epoch 5, Loss: 0.137775135146285
Client 2: Epoch 1, Loss: 0.1849932010448636
Client 2: Epoch 2, Loss: 0.15585070755475158
Client 2: Epoch 3, Loss: 0.14908570754565278
Client 2: Epoch 4, Loss: 0.13488055786197978
Client 2: Epoch 5, Loss: 0.11996503692517697
Global Model Accuracy after round 1: 0.9653

Round 2/5
Client 0: Epoch 1, Loss: 0.11624598353729095
Client 0: Epoch 2, Loss: 0.1072626434433331
Client 0: Epoch 3, Loss: 0.10956493340842062
Client 0: Epoch 4, Loss: 0.09914796235901156
Client 0: Epoch 5, Loss: 0.09753628153692123
Client 1: Epoch 1, Loss: 0.12271475492002057
Clien

In [None]:
!git clone https://github.com/parulxdev/privacy-protection-models-finance-healthcare.git


Cloning into 'privacy-protection-models-finance-healthcare'...


In [None]:
!cp *.ipynb privacy-protection-models-finance-healthcare/



cp: cannot stat '*.ipynb': No such file or directory


In [None]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [None]:
!ls "/content/drive/MyDrive/"



 1152122130006.jpg
 1152122130006.PNG
'14701012024 (1).pdf'
'14701012024 (2).pdf'
'14701012024 (3).pdf'
'14701012024 (4).pdf'
'14701012024-ParulVarandani(CSE2)-ProgC-Assignment1.pdf'
 14701012024.pdf
'3. PERFEKT.pdf'
'Admit Card_Scholarship Exam - PARUL.pdf'
 AI_class9
 Assembly_ParulVarandani.pdf
'basic maths.pdf'
 Classroom
'CLASS-X\BBET-2020-C-X (PAPER-1)-AT+PCM-SAMPLE PAPER.pdf'
'CLASS-X\BBET-2020-C-X (PAPER-2)-PCM-SAMPLE PAPER.pdf'
'Colab Notebooks'
'Copy of Scan 23 Nov 21 · 09·28·38.pdf'
 CSE2_120_147_154_IOT_MiniProject.mp4
'Differentiation CPP.pdf'
'Differentiation Discussion + Integration .pdf'
'Differentiation .pdf'
'Einwilligung Foto-Video-Aufnahmen EN_Final.docx'
'English Activity.docx'
'English Activity.gdoc'
'Getting started.pdf'
'good lines.gdoc'
 IEEE
 IMG_20210625_120818~2.jpg
'Integration 2nd Class .pdf'
'Integration Discussion .pdf'
 INTEGRATION.pdf
'Jugendkongress_ParulVarandani_DeclarationForm (1).pdf'
 Jugendkongress_ParulVarandani_DeclarationForm.pdf
'Limit basic

In [None]:
!git clone https://github.com/parulxdev/privacy-protection-models-finance-healthcare.git


fatal: destination path 'privacy-protection-models-finance-healthcare' already exists and is not an empty directory.


In [None]:
!cp "/content/drive/MyDrive/Projects/Finance_Final/"*.ipynb privacy-protection-models-finance-healthcare/


cp: cannot stat '/content/drive/MyDrive/Projects/Finance_Final/*.ipynb': No such file or directory


In [None]:
!rm -rf privacy-protection-models-finance-healthcare
!git clone https://github.com/parulxdev/privacy-protection-models-finance-healthcare.git


Cloning into 'privacy-protection-models-finance-healthcare'...


In [None]:
cp: cannot stat '/content/drive/MyDrive/Projects/Finance_Final/*.ipynb': No such file or directory


cp: target 'directory' is not a directory


In [None]:
!pwd
!ls


/content
drive  privacy-protection-models-finance-healthcare  sample_data
