# Federated Learning - Automobile Dataset Example

[Link to dataset source](https://archive.ics.uci.edu/dataset/10/automobile)

[Link to Colab (deprecated)](https://colab.research.google.com/drive/1GmAhxnKVvrhWffospDEe0rc-QB_tjfhE?usp=sharing)


In [1]:
import os
import random
import logging
import warnings
import numpy as np
from pathlib import Path
from tensorflow import convert_to_tensor
import tensorflow as tf
import matplotlib.pyplot as plt
from src.tf_utils import df_to_tfds
from src.data_examples.ex1_data_loader import ExampleDataLoader
from src.data_examples.ex1_build import eval_example_data

In [2]:
RAND_SEED = 1337
random.seed(RAND_SEED)

saved_model_path = Path('saved_models')
saved_model_path.mkdir(exist_ok=True)
metrics_csv_path = Path('metrics')
metrics_csv_path.mkdir(exist_ok=True)

tf.get_logger().setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

In [3]:
result_histories = {}
result_models = {}
result_rmse = {}

In [4]:
data = ExampleDataLoader()
data.download().load().clean()

data.df['symboling_threshold'] = [1 if i > 0 else 0 for i in data.df['symboling']]

target_feature_label = 'symboling_threshold'

using cached file cache\static\public\10\automobile.zip
extracting zip file content:
 	size: 144	filename: Index
 	size: 1197	filename: app.css
 	size: 25936	filename: imports-85.data
 	size: 4747	filename: imports-85.names
 	size: 3757	filename: misc


In [5]:
def generate_random_sample_from_spec(data_spec, features_override=[]):
  ret = {}
  for k in data_spec.keys():
    if features_override and k not in features_override:
      continue
    v = data_spec.get(k)
    if isinstance(v, tuple):
      ret[k] = random.random() * (v[1] - v[0])
    elif isinstance(v, list):
      ret[k] = random.choice(v)
    else:
      ret[k] = v
  return ret

In [6]:
__inference_sample_spec = list(map(lambda x: x.replace('_', '-'), data.features_categorical + data.features_numeric_continuous))
__inference_sample = generate_random_sample_from_spec(data.data_spec, __inference_sample_spec)
inference_sample = {}
for k, v in __inference_sample.items():
  inference_sample[k.replace('-', '_')] = convert_to_tensor([v])

inference_sample

## 1. Centralized (Conventional) Training 

In [7]:
n_epoch = 50
batch_size = 24

model_name = 'ex1ch1_auto_classifier_centralized'

ex1ch1_model_path = saved_model_path / model_name

result_histories[model_name] = []
result_models[model_name] = []

In [8]:
df_train    = data.df.sample(frac=0.8, random_state=RAND_SEED)
df_val_test = data.df.drop(df_train.index)
df_test     = df_val_test.sample(frac=0.5, random_state=RAND_SEED)
df_val      = df_val_test.drop(df_test.index)

tfds_train  = df_to_tfds(df_train, target_feature_label, batch_size=batch_size)
tfds_test   = df_to_tfds(df_test,  target_feature_label, batch_size=batch_size)
tfds_val    = df_to_tfds(df_val,   target_feature_label, batch_size=batch_size)

df_train.shape, df_test.shape, df_val.shape

((127, 27), (16, 27), (16, 27))

In [9]:
res_model, logger, history = eval_example_data(
    tfds_train,
    tfds_val,
    data,
    epoch=n_epoch,
    model_name=model_name
  )

logging to ex1ch1_auto_classifier_centralized_metrics.csv


In [10]:
res_model.save(ex1ch1_model_path)
result_histories[model_name].append(history)
result_models[model_name].append(res_model)

loss, accuracy, mse = res_model.evaluate(tfds_test)
print(
  'Loss:', loss,
  'Accuracy:', accuracy,
  'MSE:', mse
)



Loss: 0.1370590329170227 Accuracy: 0.9375 MSE: 0.3150026798248291


## 2. Federated Model - Model Ensembling

In [11]:
n_client = 5
n_epoch = 50
batch_size = 24

model_name = 'ex1ch1_auto_classifier_federated_model_ensemble'

ex1ch2_model_path = saved_model_path / model_name

result_histories[model_name] = []
result_models[model_name] = []


In [12]:
__metrics = []

for n, data_df in enumerate(np.array_split(data.df, n_client)):

  client_model_name = f'{model_name}_{n}'
  ex1ch2_model_path_c = saved_model_path / client_model_name 

  df_train    = data_df.sample(frac=0.8, random_state=RAND_SEED)
  df_val_test = data_df.drop(df_train.index)
  df_test     = df_val_test.sample(frac=0.5, random_state=RAND_SEED)
  df_val      = df_val_test.drop(df_test.index)

  _tfds_train  = df_to_tfds(df_train, target_feature_label, batch_size=batch_size)
  _tfds_test   = df_to_tfds(df_test,  target_feature_label, batch_size=batch_size)
  _tfds_val    = df_to_tfds(df_val,   target_feature_label, batch_size=batch_size)

  res_model, logger, history = eval_example_data(
    _tfds_train,
    _tfds_val,
    data,
    epoch=n_epoch,
    model_name=client_model_name
  )

  res_model.save(ex1ch2_model_path_c)
  result_histories[model_name].append(history)
  result_models[model_name].append(res_model)

  __metrics.append(res_model.evaluate(_tfds_test))


for n, (loss, accuracy, mse)  in enumerate(__metrics):
  print(
    f'{model_name} - {n}'
    'Loss:', loss,
    'Accuracy:', accuracy,
    'MSE:', mse
  )

logging to ex1ch1_auto_classifier_federated_model_ensemble_0_metrics.csv




logging to ex1ch1_auto_classifier_federated_model_ensemble_1_metrics.csv




logging to ex1ch1_auto_classifier_federated_model_ensemble_2_metrics.csv




logging to ex1ch1_auto_classifier_federated_model_ensemble_3_metrics.csv




logging to ex1ch1_auto_classifier_federated_model_ensemble_4_metrics.csv




ex1ch1_auto_classifier_federated_model_ensemble - 0Loss: 0.050481170415878296 Accuracy: 1.0 MSE: 0.05273614823818207
ex1ch1_auto_classifier_federated_model_ensemble - 1Loss: 0.21150441467761993 Accuracy: 1.0 MSE: 0.04154187813401222
ex1ch1_auto_classifier_federated_model_ensemble - 2Loss: 0.044761817902326584 Accuracy: 1.0 MSE: 0.2261928915977478
ex1ch1_auto_classifier_federated_model_ensemble - 3Loss: 0.08383435010910034 Accuracy: 1.0 MSE: 0.14228679239749908
ex1ch1_auto_classifier_federated_model_ensemble - 4Loss: 0.17041529715061188 Accuracy: 1.0 MSE: 0.45423534512519836


In [13]:
result_models[model_name][0].outputs[0]

<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'dense_3')>

In [14]:
def fn_eval(model, tfds):
  return model.evaluate(tfds)

def fn_predict(model, tfds):
  return model.predict(tfds)

def evaluate_ensemble(models, tfds, fn):
  res = []
  for model in models:
    res.append(fn(model, tfds))
  return np.mean(res, axis=0)

# loss, accuracy, mse = stacked_model.evaluate(tfds_test)
loss, accuracy, mse = evaluate_ensemble(result_models[model_name], tfds_test, fn_eval)

print(
  'Loss:', loss,
  'Accuracy:', accuracy,
  'MSE:', mse
)

Loss: 1.6764457821846008 Accuracy: 0.75 MSE: 1.0169736981391906


- perbandingan conventional/federated
- data distrib: 1-1, weighted sum
-  

In [15]:
hist = result_histories['ex1ch1_auto_classifier_federated_naive'][1]

plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title(f'Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')

plt.show()