In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

np.random.seed(42)

In [2]:
# Generate synthetic house data
num_samples = 200

data = {
    "size": np.random.uniform(500, 2000, num_samples),          # square feet
    "bedrooms": np.random.randint(1, 5, num_samples),
    "age": np.random.uniform(0, 30, num_samples),
    "distance": np.random.uniform(1, 20, num_samples)
}

df = pd.DataFrame(data)

In [3]:
# True price formula (unknown to model)
df["price"] = (
    50 * df["size"] +
    10000 * df["bedrooms"] -
    2000 * df["age"] -
    3000 * df["distance"] +
    np.random.randn(num_samples) * 10000
)

In [10]:
df.head()

Unnamed: 0,size,bedrooms,age,distance,price
0,0.376025,1.0,0.041587,0.693906,40676.026866
1,0.96314,1.0,0.529989,0.529401,71338.788918
2,0.740267,0.666667,0.539439,0.299422,48214.230558
3,0.604399,0.0,0.637995,0.811279,-11578.279341
4,0.153354,1.0,0.72827,0.680273,1169.169266


In [4]:
scaler = MinMaxScaler()
features = ["size", "bedrooms", "age", "distance"]
df[features] = scaler.fit_transform(df[features])

In [5]:
def split_clients(df, num_clients):
    shuffled = df.sample(frac=1).reset_index(drop=True)
    client_data = np.array_split(shuffled, num_clients)
    return client_data

In [6]:
def initialize_model(num_features):
    W = np.random.randn(num_features)
    b = np.random.randn()
    return W, b

In [7]:
def local_train(X, y, W, b, lr=0.01, epochs=50):
    n = len(y)

    for _ in range(epochs):
        y_pred = np.dot(X, W) + b

        dW = (1/n) * np.dot(X.T, (y_pred - y))
        db = (1/n) * np.sum(y_pred - y)

        W -= lr * dW
        b -= lr * db

    return W, b

In [8]:
def weighted_fedavg(client_models, client_sizes):
    total_samples = sum(client_sizes)

    new_W = np.zeros_like(client_models[0][0])
    new_b = 0

    for (W, b), size in zip(client_models, client_sizes):
        weight_factor = size / total_samples
        new_W += weight_factor * W
        new_b += weight_factor * b

    return new_W, new_b

In [9]:
num_clients = 4
num_rounds = 10

clients = split_clients(df, num_clients)

num_features = len(features)
global_W, global_b = initialize_model(num_features)

for round_num in range(num_rounds):
    client_models = []
    client_sizes = []

    print(f"\n--- Round {round_num+1} ---")

    for client_df in clients:
        X = client_df[features].values
        y = client_df["price"].values

        local_W, local_b = local_train(X, y, global_W.copy(), global_b)

        client_models.append((local_W, local_b))
        client_sizes.append(len(y))

    global_W, global_b = weighted_fedavg(client_models, client_sizes)

    print("Global Weights:", global_W)
    print("Global Bias:", global_b)


--- Round 1 ---
Global Weights: [7508.09511766 6385.43278236  920.67790475 1525.81071282]
Global Bias: 8313.053011367783

--- Round 2 ---
Global Weights: [12200.13013059  9726.7036206   -688.97420036   601.10157009]
Global Bias: 11436.96711599908

--- Round 3 ---
Global Weights: [15779.86818899 11899.64059293 -3100.94838004 -1140.80427928]
Global Bias: 12702.591919673734

--- Round 4 ---
Global Weights: [18864.02542458 13582.75421399 -5699.94582016 -3116.02064997]
Global Bias: 13306.048555400568

--- Round 5 ---
Global Weights: [21678.93143333 15024.61681957 -8270.65923377 -5117.53834881]
Global Bias: 13676.357457885546

--- Round 6 ---
Global Weights: [ 24310.42188991  16319.01067374 -10741.16034987  -7073.61836865]
Global Bias: 13966.988507192571

--- Round 7 ---
Global Weights: [ 26793.63277547  17503.80561187 -13090.72210555  -8960.99321985]
Global Bias: 14232.463730286174

--- Round 8 ---
Global Weights: [ 29145.27304743  18596.50393952 -15316.71867723 -10773.68815578]
Global Bia

  return bound(*args, **kwds)
