In [53]:
import collections
import tensorflow as tf
import tensorflow_federated as tff
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [30]:
# Load Preprocessed Silo Data 
silo_paths = [
    '../datasets/diabetes/processed_silos/hospital_1.csv',
    '../datasets/diabetes/processed_silos/hospital_2.csv',
    '../datasets/diabetes/processed_silos/hospital_3.csv',
    '../datasets/diabetes/processed_silos/hospital_4.csv',
    '../datasets/diabetes/processed_silos/hospital_5.csv'
]

def load_silo_data(path):
    df = pd.read_csv(path)
    
    # Correct binary label: 0 = NO, 1 = <30 or >30
    df['readmitted_binary'] = df['readmitted'].apply(lambda x: 0 if x == 0 else 1)

    # Drop target and leaky column(s)
    df = df.drop(columns=['discharge_disposition_id'])

    y = df['readmitted'].values
    X = df.drop(columns=['readmitted']).values
    return tf.data.Dataset.from_tensor_slices((X.astype(np.float32), y.astype(np.int32))).batch(32)

datasets = [load_silo_data(p) for p in silo_paths]

In [47]:
# --- Define Model Function ---
def create_keras_model():
    return tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=(datasets[0].element_spec[0].shape[1],)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=datasets[0].element_spec,
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

In [48]:
# --- Federated Averaging Process ---
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn=model_fn,
    client_optimizer_fn=tff.learning.optimizers.build_sgdm(learning_rate=0.01),
    server_optimizer_fn=tff.learning.optimizers.build_sgdm(learning_rate=1.0)
)

state = iterative_process.initialize()

2025-06-10 20:04:22.623431: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-10 20:04:22.623504: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2025-06-10 20:04:22.623743: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2025-06-10 20:04:22.624464: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-10 20:04:22.624511: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/

In [39]:
# --- Federated Training Loop ---
NUM_ROUNDS = 10
for round_num in range(1, NUM_ROUNDS + 1):
    state, metrics = iterative_process.next(state, datasets)
    print(f'Round {round_num}, Metrics={metrics}')


2025-06-10 19:57:15.890529: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-10 19:57:15.890594: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2025-06-10 19:57:15.890783: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2025-06-10 19:57:15.891474: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-10 19:57:15.891495: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/

Round 1, Metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.3465), ('loss', -2.7683804), ('num_examples', 30000), ('num_batches', 940)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
Round 2, Metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.35326666), ('loss', -7.344969), ('num_examples', 30000), ('num_batches', 940)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
Round 3, Metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.35326666), ('loss', -7.556746), ('num_examples', 30000), ('num_batches', 940)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_fin