<a href="https://colab.research.google.com/github/nikspatil0120/Ethics_AI_Experiments/blob/main/Exp2_EthicsAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [None]:
data = pd.read_csv("federated_health_dataset.csv")
data.head()


Unnamed: 0,client_id,age,bmi,blood_pressure,cholesterol_level,glucose_level,insulin_level,physical_activity_score,diet_quality_score,risk_of_diabetes
0,client_1,58,33.114602,126.69434,223.581019,148.596955,114.45687,5.536434,1.097763,0
1,client_1,71,29.118665,137.043635,191.385797,138.994652,97.089644,3.572767,3.742142,0
2,client_1,48,26.465296,127.521401,188.76016,88.724671,76.673548,3.252099,3.350063,0
3,client_1,34,29.483152,135.909579,175.43969,28.635894,108.14197,1.747565,5.32288,0
4,client_1,62,21.94839,109.965378,181.242854,92.869358,42.654259,8.433531,5.220981,0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   client_id                1000 non-null   object 
 1   age                      1000 non-null   int64  
 2   bmi                      1000 non-null   float64
 3   blood_pressure           1000 non-null   float64
 4   cholesterol_level        1000 non-null   float64
 5   glucose_level            1000 non-null   float64
 6   insulin_level            1000 non-null   float64
 7   physical_activity_score  1000 non-null   float64
 8   diet_quality_score       1000 non-null   float64
 9   risk_of_diabetes         1000 non-null   int64  
dtypes: float64(7), int64(2), object(1)
memory usage: 78.3+ KB


In [None]:


def encode_dataframe(df):
    df = df.copy()
    for col in df.columns:
        if df[col].dtype == 'object':
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])
    return df

data = encode_dataframe(data)


In [None]:
data.iloc[:, -1] = data.iloc[:, -1].astype(int)


In [None]:
def split_dataset(data, num_clients):
    data = data.sample(frac=1, random_state=42).reset_index(drop=True)
    split_size = len(data) // num_clients

    clients = []
    for i in range(num_clients):
        start = i * split_size
        end = (i + 1) * split_size
        clients.append(data.iloc[start:end])

    return clients
num_clients = 3
client_datasets = split_dataset(data, num_clients)

In [None]:
class HealthModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)


In [None]:
def local_train(model, client_df, epochs=2, lr=0.01):
    model.train()

    X = torch.tensor(client_df.iloc[:, :-1].values, dtype=torch.float32).to(device)
    y = torch.tensor(client_df.iloc[:, -1].values, dtype=torch.long).to(device)

    optimizer = optim.SGD(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    for _ in range(epochs):
        optimizer.zero_grad()
        preds = model(X)
        loss = loss_fn(preds, y)
        loss.backward()
        optimizer.step()

    return model.state_dict()


In [None]:
def federated_average(weights_list):
    avg_weights = {}

    for key in weights_list[0].keys():
        avg_weights[key] = torch.stack(
            [weights[key] for weights in weights_list]
        ).mean(dim=0)

    return avg_weights


In [None]:
def add_noise(weights, noise_scale):
    noisy_weights = {}

    for key in weights:
        noise = torch.normal(
            mean=0,
            std=noise_scale,
            size=weights[key].shape
        ).to(device)

        noisy_weights[key] = weights[key] + noise

    return noisy_weights

In [None]:
def federated_learning(data, num_clients, rounds, noise_scale):
    client_datasets = split_dataset(data, num_clients)
    input_dim = data.shape[1] - 1

    global_model = HealthModel(input_dim).to(device)

    for r in range(rounds):
        client_weights = []

        for i in range(num_clients):
            local_model = HealthModel(input_dim).to(device)
            local_model.load_state_dict(global_model.state_dict())

            weights = local_train(local_model, client_datasets[i])
            client_weights.append(weights)

        avg_weights = federated_average(client_weights)
        noisy_weights = add_noise(avg_weights, noise_scale)

        global_model.load_state_dict(noisy_weights)
        print(f"Round {r + 1} completed")

    return global_model

In [None]:
print("No Noise")
model_0 = federated_learning(data, num_clients=3,rounds=5, noise_scale=0.0)

print("\nNoise = 0.5")
model_05 = federated_learning(data, num_clients=3,rounds=5, noise_scale=0.5)

print("\nNoise = 1.0")
model_10 = federated_learning(data, num_clients=3,rounds=5, noise_scale=1.0)

No Noise
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed

Noise = 0.5
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed

Noise = 1.0
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed


In [None]:
def evaluate(model, data):
    model.eval()
    X = torch.tensor(data.iloc[:, :-1].values, dtype=torch.float32).to(device)
    y = torch.tensor(data.iloc[:, -1].values, dtype=torch.long).to(device)

    with torch.no_grad():
        preds = model(X)
        predicted = torch.argmax(preds, dim=1)

    accuracy = (predicted == y).float().mean().item()
    return accuracy


In [None]:
acc_0 = evaluate(model_0, data)
acc_05 = evaluate(model_05, data)
acc_10 = evaluate(model_10, data)

print("Accuracy (No Noise):", acc_0)
print("Accuracy (Noise 0.5):", acc_05)
print("Accuracy (Noise 1.0):", acc_10)


Accuracy (No Noise): 1.0
Accuracy (Noise 0.5): 1.0
Accuracy (Noise 1.0): 0.9800000190734863
