In [None]:
from google.colab import drive
drive.mount('/content/drive')
filepath = '/content/drive/MyDrive/Colab Notebooks/data.csv'

In [None]:
#BaseCode

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau

# BreastCancerDataset Class
class BreastCancerDataset(Dataset):
    def __init__(self, csv_file, transform=None, scaler=None):
        self.data = pd.read_csv(csv_file)
        self.data = self.data.drop(columns=['id', 'Unnamed: 32'])  # Drop unnecessary columns

        # Encode the diagnosis column (M = malignant, B = benign)
        self.label_encoder = LabelEncoder()
        self.data['diagnosis'] = self.label_encoder.fit_transform(self.data['diagnosis'])

        # Handle NaN values
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Handling NaN values...")
            self.data['diagnosis'] = self.data['diagnosis'].fillna(-1)
            self.data = self.data.fillna(self.data.mean())

        # Normalize the input features
        if scaler is None:
            self.scaler = StandardScaler()
            self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])
        else:
            self.scaler = scaler
            self.data.iloc[:, :-1] = self.scaler.transform(self.data.iloc[:, :-1])

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)
        if self.transform:
            sample = self.transform(sample)
        return sample, label


# Model for Breast Cancer Classification
class BreastCancerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(BreastCancerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)  # Adjusted dropout rate
        self.fc2 = nn.Linear(hidden_size, num_classes)

        # Initialize weights properly
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        out = self.fc1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")
            global_weights = self.global_model.state_dict()
            client_weights = []
            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)
                client_weights.append(client.get_weights())
            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.0001):  # Reduced learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr, weight_decay=1e-4)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=0.1, patience=2)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                with autocast():
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)
                # Gradient clipping with a smaller max_norm
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()
                running_loss += loss.item()
            self.scheduler.step(running_loss / len(self.train_loader))
            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = BreastCancerDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training, validation, and testing sets
    train_size = int(0.7 * total_size)  # 70% for training
    val_size = int(0.15 * total_size)  # 15% for validation
    test_size = total_size - train_size - val_size  # 15% for testing
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    # Apply the same scaler to validation and test sets
    val_dataset.dataset.scaler = train_dataset.dataset.scaler
    test_dataset.dataset.scaler = train_dataset.dataset.scaler

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

    # Debugging: Check the shape of the first batch
    for data, labels in train_loader:
        print(f"Data shape: {data.shape}")  # Should be (batch_size, 30)
        print(f"Labels shape: {labels.shape}")  # Should be (batch_size,)
        break

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32  # Increased hidden size
    num_classes = 2  # Binary classification (benign or malignant)
    global_model = BreastCancerModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights (adjust based on class distribution)
    class_weights = torch.tensor([1.0, 2.0])  # Example: Give more weight to the minority class
    clients = [Client(client_id=i,
                      model=BreastCancerModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.0001)  # Reduced learning rate
               for i in range(num_clients)]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()


if __name__ == '__main__':
    main()

In [None]:
#secure Aggregation

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau

# BreastCancerDataset Class
class BreastCancerDataset(Dataset):
    def __init__(self, csv_file, transform=None, scaler=None):
        self.data = pd.read_csv(csv_file)
        self.data = self.data.drop(columns=['id', 'Unnamed: 32'])  # Drop unnecessary columns

        # Encode the diagnosis column (M = malignant, B = benign)
        self.label_encoder = LabelEncoder()
        self.data['diagnosis'] = self.label_encoder.fit_transform(self.data['diagnosis'])

        # Handle NaN values
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Handling NaN values...")
            self.data['diagnosis'] = self.data['diagnosis'].fillna(-1)
            self.data = self.data.fillna(self.data.mean())

        # Normalize the input features
        if scaler is None:
            self.scaler = StandardScaler()
            self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])
        else:
            self.scaler = scaler
            self.data.iloc[:, :-1] = self.scaler.transform(self.data.iloc[:, :-1])

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)
        if self.transform:
            sample = self.transform(sample)
        return sample, label


# Model for Breast Cancer Classification
class BreastCancerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(BreastCancerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)  # Adjusted dropout rate
        self.fc2 = nn.Linear(hidden_size, num_classes)

        # Initialize weights properly
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        out = self.fc1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out


# Server for Federated Learning
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")
            global_weights = self.global_model.state_dict()
            client_weights = []
            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)
                client_weights.append(client.get_weights())
            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.0001):  # Reduced learning rate
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr, weight_decay=1e-4)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=0.1, patience=2)
        self.scaler = GradScaler()

    def set_weights(self, global_weights):
        self.local_model.load_state_dict(global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                with autocast():
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)
                # Gradient clipping with a smaller max_norm
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=3.0)
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()
                running_loss += loss.item()
            self.scheduler.step(running_loss / len(self.train_loader))
            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients
def split_dataset(dataset, num_clients):
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = BreastCancerDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training, validation, and testing sets
    train_size = int(0.7 * total_size)  # 70% for training
    val_size = int(0.15 * total_size)  # 15% for validation
    test_size = total_size - train_size - val_size  # 15% for testing
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    # Apply the same scaler to validation and test sets
    val_dataset.dataset.scaler = train_dataset.dataset.scaler
    test_dataset.dataset.scaler = train_dataset.dataset.scaler

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

    # Debugging: Check the shape of the first batch
    for data, labels in train_loader:
        print(f"Data shape: {data.shape}")  # Should be (batch_size, 30)
        print(f"Labels shape: {labels.shape}")  # Should be (batch_size,)
        break

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 64  # Increased hidden size
    num_classes = 2  # Binary classification (benign or malignant)
    global_model = BreastCancerModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights (adjust based on class distribution)
    class_weights = torch.tensor([1.0, 2.0])  # Example: Give more weight to the minority class
    clients = [Client(client_id=i,
                      model=BreastCancerModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.0001)  # Reduced learning rate
               for i in range(num_clients)]

    global_epochs = 5  # Increased number of epochs
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()


if __name__ == '__main__':
    main()

In [None]:
# Differential Privacy

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from collections import Counter
from opacus import PrivacyEngine

# BreastCancerDataset Class (updated)
class BreastCancerDataset(Dataset):
    def __init__(self, csv_file, transform=None, augment=False, scaler=None):
        self.data = pd.read_csv(csv_file)

        # Drop unnecessary columns
        self.data = self.data.drop(columns=['id', 'Unnamed: 32'])

        # Handle NaN values
        self.data.iloc[:, 1:] = self.data.iloc[:, 1:].fillna(self.data.iloc[:, 1:].mean())  # Fill feature columns with mean
        self.data['diagnosis'] = self.data['diagnosis'].fillna('Unknown')  # Fill diagnosis column with placeholder

        # Encode diagnosis column (M=1, B=0)
        self.label_encoder = LabelEncoder()
        self.data['diagnosis'] = self.label_encoder.fit_transform(self.data['diagnosis'])

        # Normalize the input features
        self.scaler = scaler if scaler else StandardScaler()
        self.data.iloc[:, 1:] = self.scaler.fit_transform(self.data.iloc[:, 1:])

        # Data Augmentation: Add noise to malignant cases
        if augment:
            self.augment_malignant_cases()

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, 1:].values.astype(np.float32)  # Features are columns 1:
        label = self.data.iloc[idx, 0].astype(np.int64)  # Label is column 0 (diagnosis)

        if self.transform:
            sample = self.transform(sample)

        return sample, label

    def augment_malignant_cases(self):
        """Add noise to malignant cases to create synthetic samples."""
        malignant_indices = self.data[self.data['diagnosis'] == 1].index
        num_malignant = len(malignant_indices)
        if num_malignant == 0:
            return

        # Generate synthetic malignant cases by adding Gaussian noise
        malignant_samples = self.data.iloc[malignant_indices, 1:].values
        noise = np.random.normal(0, 0.1, malignant_samples.shape)  # Small noise
        synthetic_samples = malignant_samples + noise

        # Append synthetic malignant cases to the dataset
        synthetic_data = pd.DataFrame(synthetic_samples, columns=self.data.columns[1:])
        synthetic_data['diagnosis'] = 1
        self.data = pd.concat([self.data, synthetic_data], ignore_index=True)
        print(f"Augmented {num_malignant} malignant cases. New malignant count: {len(self.data[self.data['diagnosis'] == 1])}")


# Autoencoder for Anomaly Detection (unchanged)
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size),
            nn.Sigmoid()  # Ensure output is in the same range as input
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


# BreastCancerModel Class (updated)


class BreastCancerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(BreastCancerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.gn1 = nn.GroupNorm(1, hidden_size)  # GroupNorm with 1 group (equivalent to LayerNorm)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.gn2 = nn.GroupNorm(1, hidden_size // 2)  # GroupNorm with 1 group
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(hidden_size // 2, num_classes)

    def forward(self, x):
        x = torch.relu(self.gn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.gn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Server for Federated Learning (unchanged)
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs  # Global number of epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            # Handle the _module prefix added by Opacus
            opacus_key = f"_module.{key}"  # Add prefix to match client keys
            if opacus_key in client_weights[0]:  # Check if the key exists in client weights
                global_weights[key] = torch.stack([client_weights[i][opacus_key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")

            global_weights = self.global_model.state_dict()
            client_weights = []

            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)  # Pass the global epochs here
                client_weights.append(client.get_weights())

            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning (unchanged)
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.001, epsilon=1.0, delta=1e-5):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr)
        self.scaler = GradScaler()
        self.epsilon = epsilon
        self.delta = delta

        # Initialize PrivacyEngine
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=1.1,  # Adjust this value based on your privacy budget
            max_grad_norm=1.0,  # Gradient clipping norm
        )

    def set_weights(self, global_weights):
        # Handle the _module prefix added by Opacus
        new_global_weights = {}
        for key, value in global_weights.items():
            new_global_weights[f"_module.{key}"] = value
        self.local_model.load_state_dict(new_global_weights)

    def get_weights(self):
        return self.local_model.state_dict()

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()

                # Mixed Precision Training
                with autocast():  # Automatically uses the current device (cuda or cpu)
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)

                # Gradient clipping to avoid exploding gradients
                torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), max_norm=1.0)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                running_loss += loss.item()

            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")

# Function to split the dataset across clients (unchanged)
def split_dataset(dataset, num_clients):
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets




def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    scaler = StandardScaler()  # External scaler for consistent normalization
    dataset = BreastCancerDataset(csv_file=csv_file, augment=True, scaler=scaler)  # Enable data augmentation
    total_size = len(dataset)

    # Split the dataset into training and testing sets
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders for train and test datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 32
    num_classes = 2  # Binary classification (malignant or benign)
    global_model = BreastCancerModel(input_size, hidden_size, num_classes)

    # Define class weights
    class_weights = torch.tensor([1.0, 2.0])  # Adjusted weights for breast cancer dataset
    clients = [
        Client(
            client_id=i,
            model=BreastCancerModel(input_size, hidden_size, num_classes),
            train_loader=client_loaders[i],
            test_loader=test_loader,
            device=device,
            lr=0.001,
            epsilon=1.0,  # Privacy budget
            delta=1e-5  # Privacy parameter
        )
        for i in range(num_clients)
    ]

    global_epochs = 5
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()


if __name__ == '__main__':
    main()


In [None]:
# Secyre Aggregation + Diffrential Privacy

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from opacus import PrivacyEngine  # Import Opacus for differential privacy

# BreastCancerDataset Class (unchanged)
class BreastCancerDataset(Dataset):
    def __init__(self, csv_file, transform=None, scaler=None):
        self.data = pd.read_csv(csv_file)
        self.data = self.data.drop(columns=['id', 'Unnamed: 32'])  # Drop unnecessary columns

        # Encode the diagnosis column (M = malignant, B = benign)
        self.label_encoder = LabelEncoder()
        self.data['diagnosis'] = self.label_encoder.fit_transform(self.data['diagnosis'])

        # Handle NaN values
        if self.data.isna().sum().sum() > 0:
            print("Warning: NaN values found in the dataset. Handling NaN values...")
            self.data['diagnosis'] = self.data['diagnosis'].fillna(-1)
            self.data = self.data.fillna(self.data.mean())

        # Normalize the input features
        if scaler is None:
            self.scaler = StandardScaler()
            self.data.iloc[:, :-1] = self.scaler.fit_transform(self.data.iloc[:, :-1])
        else:
            self.scaler = scaler
            self.data.iloc[:, :-1] = self.scaler.transform(self.data.iloc[:, :-1])

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx, :-1].values.astype(np.float32)
        label = self.data.iloc[idx, -1].astype(np.int64)
        if self.transform:
            sample = self.transform(sample)
        return sample, label


# BreastCancerModel with GroupNorm (updated)
class BreastCancerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(BreastCancerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.gn1 = nn.GroupNorm(num_groups=1, num_channels=hidden_size)  # GroupNorm instead of BatchNorm
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)
        self.fc2 = nn.Linear(hidden_size, num_classes)

        # Initialize weights properly
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        out = self.fc1(x)
        out = self.gn1(out)  # Use GroupNorm
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out


# Server for Federated Learning (unchanged)
class Server:
    def __init__(self, model, clients, num_rounds, epochs, device):
        self.global_model = model.to(device)
        self.clients = clients
        self.num_rounds = num_rounds
        self.epochs = epochs
        self.device = device

    def aggregate_weights(self, client_weights):
        global_weights = self.global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.stack([client_weights[i][key].float() for i in range(len(client_weights))]).mean(0)
        self.global_model.load_state_dict(global_weights)

    def distribute_and_train(self):
        for round_num in range(self.num_rounds):
            print(f"\nRound {round_num + 1}/{self.num_rounds}")
            global_weights = self.global_model.state_dict()
            client_weights = []
            for client in self.clients:
                client.set_weights(global_weights)
                client.train(self.epochs)
                client_weights.append(client.get_weights())
            self.aggregate_weights(client_weights)
            accuracy = self.evaluate_global_model()
            print(f"Global Model Accuracy after round {round_num + 1}: {accuracy:.4f}")

    def evaluate_global_model(self):
        self.global_model.eval()
        correct, total = 0, 0
        test_loader = self.clients[0].test_loader
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.global_model(data)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
        return correct / total


# Client for Federated Learning (updated with state dictionary fixes)
class Client:
    def __init__(self, client_id, model, train_loader, test_loader, device, lr=0.0001):
        self.client_id = client_id
        self.local_model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.local_model.parameters(), lr=lr, weight_decay=1e-4)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=0.1, patience=2)
        self.scaler = GradScaler()

        # Add differential privacy
        self.privacy_engine = PrivacyEngine()
        self.local_model, self.optimizer, self.train_loader = self.privacy_engine.make_private(
            module=self.local_model,
            optimizer=self.optimizer,
            data_loader=self.train_loader,
            noise_multiplier=0.5,  # Adjust based on privacy budget
            max_grad_norm=3.0,  # Clip gradients to this norm
        )

    def set_weights(self, global_weights):
        # Prepend '_module.' to keys to match GradSampleModule's state_dict
        global_weights_fixed = {f"_module.{k}": v for k, v in global_weights.items()}
        self.local_model.load_state_dict(global_weights_fixed)

    def get_weights(self):
        # Remove '_module.' prefix from keys to match the original model's state_dict
        local_weights_fixed = {k.replace("_module.", ""): v for k, v in self.local_model.state_dict().items()}
        return local_weights_fixed

    def train(self, epochs):
        self.local_model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                with autocast():
                    outputs = self.local_model(data)
                    loss = self.criterion(outputs, labels)
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()
                running_loss += loss.item()
            self.scheduler.step(running_loss / len(self.train_loader))
            print(f"Client {self.client_id}: Epoch {epoch + 1}, Loss: {running_loss / len(self.train_loader)}")


# Function to split the dataset across clients (unchanged)
def split_dataset(dataset, num_clients):
    client_datasets = random_split(dataset, [len(dataset) // num_clients] * (num_clients - 1) + [len(dataset) - len(dataset) // num_clients * (num_clients - 1)])
    return client_datasets


def main():
    csv_file = filepath  # Update this path

    # Load the dataset
    dataset = BreastCancerDataset(csv_file=csv_file)
    total_size = len(dataset)

    # Split the dataset into training, validation, and testing sets
    train_size = int(0.7 * total_size)  # 70% for training
    val_size = int(0.15 * total_size)  # 15% for validation
    test_size = total_size - train_size - val_size  # 15% for testing
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    # Apply the same scaler to validation and test sets
    val_dataset.dataset.scaler = train_dataset.dataset.scaler
    test_dataset.dataset.scaler = train_dataset.dataset.scaler

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

    # Debugging: Check the shape of the first batch
    for data, labels in train_loader:
        print(f"Data shape: {data.shape}")  # Should be (batch_size, 30)
        print(f"Labels shape: {labels.shape}")  # Should be (batch_size,)
        break

    # Number of clients
    num_clients = 3
    client_datasets = split_dataset(train_dataset, num_clients)
    client_loaders = [DataLoader(ds, batch_size=64, shuffle=True) for ds in client_datasets]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_size = 30  # Number of features in the dataset
    hidden_size = 64  # Increased hidden size
    num_classes = 2  # Binary classification (benign or malignant)
    global_model = BreastCancerModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)

    # Define class weights (adjust based on class distribution)
    class_weights = torch.tensor([1.0, 2.0])  # Example: Give more weight to the minority class
    clients = [Client(client_id=i,
                      model=BreastCancerModel(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes),
                      train_loader=client_loaders[i],
                      test_loader=test_loader,
                      device=device,
                      lr=0.0001)  # Reduced learning rate
               for i in range(num_clients)]

    global_epochs = 10 # Increased number of epochs
    server = Server(model=global_model, clients=clients, num_rounds=5, epochs=global_epochs, device=device)
    server.distribute_and_train()


if __name__ == '__main__':
    main()

In [1]:
https://github.com/parulxdev/privacy-protection-models-finance-healthcare.git


SyntaxError: invalid syntax (ipython-input-2110472279.py, line 1)