In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)

data = {
    "study_hours": np.random.uniform(0, 10, 100),
    "attendance": np.random.uniform(50, 100, 100),
    "internal_marks": np.random.uniform(10, 50, 100),
    "final_score": np.random.uniform(35, 100, 100)
}

df = pd.DataFrame(data)


In [None]:
df.head()

Unnamed: 0,study_hours,attendance,internal_marks,final_score
0,3.745401,51.571459,35.681266,38.359312
1,9.507143,81.820521,13.365599,69.538051
2,7.319939,65.717799,16.465149,70.141283
3,5.986585,,45.942168,76.432944
4,1.560186,95.378324,34.257162,82.195937


Introduce Missing Values

In [None]:
for col in df.columns:
    df.loc[df.sample(frac=0.1).index, col] = np.nan


Handling Missing Values

In [None]:
df_filled = df.fillna(df.mean())

Normalizing Features

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

features = ["study_hours", "attendance", "internal_marks"]
df_filled[features] = scaler.fit_transform(df_filled[features])

Partitioning Data for Federated Learning

In [None]:
def partition_data(df, num_clients):
    client_data = []
    data_per_client = len(df) // num_clients

    for i in range(num_clients):
        start = i * data_per_client
        end = start + data_per_client
        client_data.append(df.iloc[start:end])

    return client_data

Creating Local Client Datasets

In [None]:
num_clients = 5
clients = partition_data(df_filled, num_clients)

for i, client_df in enumerate(clients):
    print(f"\nClient {i+1} data shape:", client_df.shape)


Client 1 data shape: (20, 4)

Client 2 data shape: (20, 4)

Client 3 data shape: (20, 4)

Client 4 data shape: (20, 4)

Client 5 data shape: (20, 4)
