## Step 1: Import libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import tensorflow as tf
import yaml
import argparse

from keras.callbacks import TensorBoard
from statsmodels.tsa.seasonal import seasonal_decompose
from keras.callbacks import EarlyStopping

from libs.FLDigitalTwin import FLDigitalTwin

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default="traffic")
parser.add_argument("--prefix", type=str, default="normal")
parser.add_argument("--percent_mc", type=str, default="01")
parser.add_argument("--missing_mode", type=str, default="noadjacency")
parser.add_argument("--matrix_ml", type=str, default="10x10")
parser.add_argument("--weight_mechanism", type=int, default=0)
parser.add_argument("--is_cluster", type=str, default="no")

## Step 2: Load configs

In [None]:
# Get values defined from CLI
CONFIG_FILE = '.config_ipynb'
if os.path.isfile(CONFIG_FILE):
    with open(CONFIG_FILE) as f:
        _args = f.read().split()
        _args = _args[1:]
else:
    _args = ['--dataset', 'electricity', '--prefix', 'avg', '--percent_mc', '01', '--missing_mode', 'noadjacency', '--is_cluster no']

args = parser.parse_args(_args)

# Get configs from yaml
with open('config.yaml', 'r') as file:
    yaml_data = yaml.load(file, Loader=yaml.SafeLoader)

config = {
    'DATASET': args.dataset if args.dataset else yaml_data['DATASET'],
    'NUM_DATA_SHEETS': yaml_data['NUM_DATA_SHEETS'],
    'NUM_CLIENTS': yaml_data['NUM_CLIENTS'],
    "LOOK_BACK": yaml_data['LOOK_BACK'],
    'EPOCHS': yaml_data['EPOCHS'],
    'CLIENT_EPOCHS': yaml_data['CLIENT_EPOCHS'],
    'BATCH_SIZE': yaml_data['BATCH_SIZE'],
    'SAVE_INTERVAL': yaml_data['SAVE_INTERVAL'],
    'LEARNING_RATE': yaml_data['LEARNING_RATE'],
    'TRAIN_SIZE': yaml_data['TRAIN_SIZE'],
    'TRAIN_ROUNDS': yaml_data['TRAIN_ROUNDS'],
    'DATA_DIR': yaml_data['DATA_DIR'],
    'CLIENT_MATRIX_DIR': yaml_data['CLIENT_MATRIX_DIR'],
    'FL_OUTPUT_DIR': yaml_data['FL_OUTPUT_DIR'],
    'GENERAL_OUTPUT_DIR': yaml_data['GENERAL_OUTPUT_DIR'],
    'PREFIX': args.prefix if args.prefix else yaml_data['PREFIX'],
    'PERCENTAGE_MISSING_CLIENT': args.percent_mc if args.percent_mc else yaml_data['PERCENTAGE_MISSING_CLIENT'],
    'MISSING_MODE': args.missing_mode if args.missing_mode else yaml_data['MISSING_MODE'],
    'MATRIX_MISSING_LENGTH': args.matrix_ml if args.matrix_ml else yaml_data['MATRIX_MISSING_LENGTH'],
    'IS_CLUSTER': args.is_cluster if args.is_cluster else yaml_data['IS_CLUSTER'],
    'WEIGHT_MECHANISM': args.weight_mechanism if args.weight_mechanism else 0,
    'WEIGHT_TRACKING_DIR': os.path.join('model_weight_track', args.dataset if args.dataset else yaml_data['DATASET'], args.prefix if args.prefix else yaml_data['PREFIX'], args.matrix_ml if args.matrix_ml else yaml_data['MATRIX_MISSING_LENGTH'], args.missing_mode if args.missing_mode else yaml_data['MISSING_MODE'])
}

print(config)
print(args)

## Step 3: Set tf random seed

In [None]:
tf.random.set_seed(3)  # Set random seed for reproducibility

## Step 4: Initialize FL-DT Class

In [None]:
FLDT = FLDigitalTwin(config=config)

## Step 5.1: Load dataset

In [None]:
# Define the file path and sheet names with their corresponding keys
file_path = f"{config['DATA_DIR']}/{config['DATASET']}/standard_{config['DATASET']}.xlsx"
sheets = {}
for i in range(config['NUM_DATA_SHEETS']):
    sheets[f'client_{str(i)}'] = f'Sheet_{str(i + 1)}'

# Load the data
dictionary = FLDT.load_data(file_path, sheets)
val_dictionary = FLDT.load_val_data(file_path, sheets)

## Step 5.2: Example for client_1 data

In [None]:
client_name = "client_1"
df = dictionary[client_name]
df_val = val_dictionary[client_name]
# df["value"] = df.iloc[:, 1:-1].sum(axis=1)

# Visualize data
FLDT.visualize_data(df, "Energy Distribution (kWh)", "Value (kWh)", "Frequency")

## Step 5.3: Split train/test data

In [None]:
# Scale and split data
training_set, test_data, sc_X = FLDT.scale_split_datasets(
    df["value"], config['TRAIN_SIZE'], config['LOOK_BACK']
)
x_train, y_train = FLDT.create_rnn_dataset(training_set, config['LOOK_BACK'])
x_test, y_test = FLDT.create_rnn_dataset(test_data, config['LOOK_BACK'])

## Step 5.4: Create models

In [None]:
# Create and train the model
ts_model = FLDT.create_model(input_shape=(1, config['LOOK_BACK']), output_shape=1)
log_dir = f"logs/{config['PREFIX']}fit/" + dt.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# with tf.device('/GPU:0'):
ts_model.fit(
    x_train,
    y_train,
    epochs=config['CLIENT_EPOCHS'],
    batch_size=config['BATCH_SIZE'],
    verbose=1,
    callbacks=[tensorboard_callback],
)

# Evaluate model
ts_model.evaluate(x_test, y_test, verbose=1)
predict_on_train = ts_model.predict(x_train)
predict_on_test = ts_model.predict(x_test)

## Step 5.5: Try plotting

In [None]:
predict_on_train = sc_X.inverse_transform(predict_on_train)
predict_on_test = sc_X.inverse_transform(predict_on_test)

# Plot predictions
plot_original, plot_train, plot_test = FLDT.plot_data_preparation(
    df["value"], predict_on_train, predict_on_test, config['LOOK_BACK']
)
FLDT.plot_the_data(plot_original, plot_train, plot_test, "Model Predictions vs Actual")

# Seasonal decomposition
result = seasonal_decompose(df["value"], model="additive", period=365)
result.plot()
plt.suptitle("Seasonal Decomposition of Value")
plt.show()

# Enhanced visualization of seasonal decomposition
fig, axes = plt.subplots(4, 1, figsize=(15, 12), sharex=True)
result.observed.plot(ax=axes[0], title="Observed")
result.trend.plot(ax=axes[1], title="Trend")
result.seasonal.plot(ax=axes[2], title="Seasonal")
result.resid.plot(ax=axes[3], title="Residual")
for ax in axes:
    ax.set_ylabel("Value Summation")
axes[3].set_xlabel("Time")
plt.tight_layout()
plt.show()

## Step 5.6: Try saving to excel file

In [None]:
# Save the residuals to an Excel file
os.makedirs(os.path.join(config['GENERAL_OUTPUT_DIR'], client_name, config['PREFIX']), exist_ok=True)
pd.DataFrame(result.resid).to_excel(
    os.path.join(config['GENERAL_OUTPUT_DIR'], client_name, config['PREFIX'], '_client.xlsx'), sheet_name="Decomposition_Residuals"
)

# Summary statistics
residual_stats = pd.DataFrame(
    {
        "Mean": [result.resid.mean()],
        "Median": [result.resid.median()],
        "Standard Deviation": [result.resid.std()],
        "Max": [result.resid.max()],
        "Min": [result.resid.min()],
    }
)

# Save summary statistics
residual_stats.to_excel(os.path.join(config['GENERAL_OUTPUT_DIR'], client_name, config['PREFIX'], 'residual_statistics.xlsx'), index=False)

# Display summary statistics
print("Summary Statistics of Residuals:")
print(residual_stats)

## Step 5.7: Evaluate models

In [None]:
# Evaluation metrics
metrics = ts_model.evaluate(x_test, y_test, verbose=1)
metrics_df = pd.DataFrame([metrics], columns=["Loss", "MSE", "MAE", "MAPE"])

# Save evaluation metrics
metrics_df.to_excel(os.path.join(config['GENERAL_OUTPUT_DIR'], client_name, config['PREFIX'], 'evaluation_metrics.xlsx'), index=False)

# Display evaluation metrics
print("Evaluation Metrics:")
print(metrics_df)

## Step 5.8: Create train/test set

In [None]:
# Central
# central_df = pd.DataFrame(columns=['date', 'value'])
# central_train_test_dataset = []
# Client
train_test_dataset = []
val_dataset = []
for i in range(config['NUM_DATA_SHEETS']):
    _x_train, _y_train, _x_test, _y_test, _sc_cl = FLDT.create_train_test_dataset(dictionary[f'client_{str(i)}'], config['LOOK_BACK'])
    train_test_dataset.append((_x_train, _y_train, _x_test, _y_test, _sc_cl))

    _val_x_train, _val_y_train, _val_x_test, _val_y_test, _val_sc_cl = FLDT.create_train_test_dataset(val_dictionary[f'client_{str(i)}'], config['LOOK_BACK'])
    val_dataset.append((_val_x_train, _val_y_train, _val_x_test, _val_y_test, _val_sc_cl))

    # val_central_df = val_dictionary[f'client_{str(i)}']
    # central_df = pd.concat([central_df, ], axis=0, ignore_index=True)

# central_train_x, central_train_y, central_test_x, central_test_y, _central_sc_cl = FLDT.create_train_test_dataset(central_df[:2000], config['LOOK_BACK'])
central_train_x, central_train_y, central_test_x, central_test_y, _central_sc_cl = FLDT.create_train_test_dataset(val_dictionary['client_0'], config['LOOK_BACK'])

## Step 5.9: Create model arrays

In [None]:
model_arr = []
for i in range(config['NUM_CLIENTS']):
    _model = FLDT.create_model(input_shape=(1, config['LOOK_BACK']), output_shape=1)
    model_arr.append(_model)

central_model = FLDT.create_model(input_shape=(1, config['LOOK_BACK']), output_shape=1)

## Step 5.10: Train models

In [None]:
# for i in range(config['NUM_CLIENTS']):
#     _x_train, _y_train, _, _, _ = train_test_dataset[i]
#     FLDT.train_model(model_arr[i], _x_train, _y_train, os.path.join('logs', 'fit', config['PREFIX'], str(i)))

## Step 5.11: Evaluate models

In [None]:
# for i in range(config['NUM_CLIENTS']):
#     _x_train, _y_train, _x_test, _y_test, _ = train_test_dataset[i]
#     FLDT.evaluate_model(model_arr[i], _x_train, _y_train, _x_test, _y_test)

## Step 6: Execute the full client update scenario

In [None]:
client_matrix_prefix = 'client_matrix_' + config['MATRIX_MISSING_LENGTH']
if config['IS_CLUSTER'] != 'no':
    client_matrix_prefix = config['IS_CLUSTER'] + '_' + client_matrix_prefix
client_matrix_path = os.path.join(config['CLIENT_MATRIX_DIR'], client_matrix_prefix + '_' + config['PERCENTAGE_MISSING_CLIENT'] + '_' + config['MISSING_MODE'] + '.csv')
client_matrix = np.loadtxt(client_matrix_path, delimiter=',', dtype=str)
print(client_matrix)

if not os.path.exists(config['WEIGHT_TRACKING_DIR']) and config['PREFIX'] == 'weight':
    os.makedirs(config['WEIGHT_TRACKING_DIR'])

history_client_normal_dict = {}
init_weights = [model.get_weights() for model in model_arr]
global_weights = [ np.mean([w[i] for w in init_weights], axis=0) for i in range(len(init_weights[0])) ]
central_model_evaluation_rounds = []

for r in range(config['TRAIN_ROUNDS']):
    print(f'----------------------------------Round {r}-------------------------------------')
    
    fake_model = FLDT.create_model(input_shape=(1, config['LOOK_BACK']), output_shape=1)
    
    history_client_normal_dict[str(r)] = {}
    weights_per_round = []

    central_model.set_weights(global_weights)
    _, mse, _, _ = central_model.evaluate(central_test_x, central_test_y, verbose=0)

    rmse = mse**(1/2)
    central_model_evaluation_rounds.append(rmse)

    for i in range(len(model_arr)):
        print(f"Processing with dataset {i}...")
        _x_train, _y_train, _x_test, _y_test, _ = train_test_dataset[i]
        if args.prefix == 'normal':
            print("Run with FedNorm enabled...")
            # hist_dict_normal = FLDigitalTwin.train_fl_full_updates(model_arr, _x_train, _y_train, _x_test, _y_test)
            hist_dict_normal = FLDT._train_fl_full_updates(model_arr, _x_train, _y_train, _x_test, _y_test, i, global_weights)
            weights_per_round.append(model_arr[i].get_weights())
            fake_model.set_weights(model_arr[i].get_weights())
        else:
            hist_dict_normal = FLDT._train_fl_full_updates(model_arr, _x_train, _y_train, _x_test, _y_test, i, global_weights)
            if r >= 2 and client_matrix[r, i] == 'N':
                print(f"Client {i} has no update at round {r}...")
                if args.prefix == 'weight':
                    print(f"Run with FedDT enabled...")

                    csv_filename1 = f'model_client_{str(i)}_round_{str(r - 2)}.csv'
                    csv_filename2 = f'model_client_{str(i)}_round_{str(r - 1)}.csv'

                    prev_csv_1 = os.path.join(config['WEIGHT_TRACKING_DIR'], csv_filename1)
                    prev_csv_2 = os.path.join(config['WEIGHT_TRACKING_DIR'], csv_filename2)
                    
                    print(f"Calculating weights using the sum of round {r - 1} and {r - 2}...")
                    weight_dt = FLDT.sum_weights_from_two_csvs(prev_csv_1, prev_csv_2, model_arr[i])

                    weights_per_round.append(weight_dt)
                    fake_model.set_weights(weight_dt)  
                else:
                    print(f"Run with FedAvg enabled...")
                    zero_w = [0.0*np.random.rand(*w.shape) for w in model_arr[i].get_weights()]
                    
                    weights_per_round.append(zero_w)
                    fake_model.set_weights(zero_w)
            else:
                weights_per_round.append(model_arr[i].get_weights())
                fake_model.set_weights(model_arr[i].get_weights())

            if args.prefix == 'weight':
                csv_filename = f'model_client_{str(i)}_round_{str(r)}.csv'
                csv_file = os.path.join(config['WEIGHT_TRACKING_DIR'], csv_filename)
                FLDT.save_weights_to_csv(fake_model, csv_file)
                print(f"Saved weights to {csv_file}")

        history_client_normal_dict[str(r)][f'client_{str(i)}'] = hist_dict_normal

    weights = [w for w in weights_per_round]
    global_weights = [
        np.mean([w[i] for w in weights], axis=0) for i in range(len(weights[0]))
    ]

In [None]:
outputs_prefix = 'model_history'
if args.is_cluster != 'no':
    outputs_prefix = "model_history/" + args.is_cluster
history_client_save_dir = os.path.join(outputs_prefix, config['DATASET'], config['PREFIX'], 'clients', config['MATRIX_MISSING_LENGTH'], config['PERCENTAGE_MISSING_CLIENT'], config['MISSING_MODE'])
if config['PREFIX'] == 'normal':
    history_client_save_dir = os.path.join(outputs_prefix, config['DATASET'], config['PREFIX'], 'clients')
    
if not os.path.exists(history_client_save_dir):
    os.makedirs(history_client_save_dir)

FLDT.to_json(f"{history_client_save_dir}/losses_rmses.json", history_client_normal_dict)

In [None]:
# Get loss of central model after training all round with all clients
# central_x_train, central_y_train, central_x_test, central_y_test, _ = train_test_dataset[0]
# central_model = FLDT.create_model(input_shape=(1, config['LOOK_BACK']), output_shape=1)
# # weights = [model.get_weights() for model in model_arr]
# # new_weights = [
# #             np.mean([w[i] for w in weights], axis=0) for i in range(len(weights[0]))
# #         ]
# central_model.set_weights(global_weights)
# rmse_arr = []
# for val_data in train_test_dataset:
#     _, _, val_x_test, val_y_test, _ = val_data
#     predictions = central_model.predict(val_x_test)
#     rmse_arr.append((mean_squared_error(val_y_test, predictions)**(1/2)))

# losses_rmses_dict = {
#     "rmses": rmse_arr
# }

# print(f"Central model cross prediction/test RMSE: {np.mean(rmse_arr)}")
# central_history = central_model.fit(central_x_train, central_y_train, validation_data=(central_x_test, central_y_test), epochs=config['EPOCHS'], batch_size=config['BATCH_SIZE'], verbose=1)

# losses = central_history.history['loss']
# rmses = [x**(1/2) for x in central_history.history['mse']]
# maes = central_history.history['mae']
# val_losses = central_history.history['val_loss']
# val_rmses = [x**(1/2) for x in central_history.history['val_mse']]
# val_maes = central_history.history['val_mae']

central_model.set_weights(global_weights)
rmses = []
losses = []
maes = []
for val_data in val_dataset:
    _, _, val_x_test, val_y_test, _ = val_data
    loss, mse, mae, mape = central_model.evaluate(val_x_test, val_y_test, verbose=1)
    rmses.append(mse**(1/2))
    losses.append(loss)
    maes.append(mae)

central_loss, central_mse, central_mae, central_mape = central_model.evaluate(central_test_x, central_test_y, verbose=1)


losses_rmses_dict = {
    "central_round_predictions": central_model_evaluation_rounds,
    "client_predictions": {
        "losses": losses,
        "rmses": rmses,
        "maes": maes
    },
    "central_predictions": {
        "loss": central_loss,
        "rmse": central_mse**(1/2),
        "mae": central_mae
    }
}

In [None]:
early_stopping = EarlyStopping(
    monitor="val_loss", min_delta=0.001, patience=5, verbose=1, mode="auto", restore_best_weights=True
)
central_history = central_model.fit(
    central_train_x, 
    central_train_y, 
    validation_data=(central_test_x, central_test_y), 
    epochs=config['EPOCHS'], 
    batch_size=config['BATCH_SIZE'], 
    verbose=1,
    callbacks=[early_stopping]
)
refit_losses = central_history.history['loss']
refit_rmses = [x**(1/2) for x in central_history.history['mse']]
refit_maes = central_history.history['mae']
refit_val_losses = central_history.history['val_loss']
refit_val_rmses = [x**(1/2) for x in central_history.history['val_mse']]
refit_val_maes = central_history.history['val_mae']

losses_rmses_dict["refit"] = {
    "losses": refit_losses,
    "rmses": refit_rmses,
    "maes": refit_maes,
    "val_losses": refit_val_losses,
    "val_rmses": refit_val_rmses,
    "val_maes": refit_val_maes
}

In [None]:
history_central_save_dir = os.path.join(outputs_prefix, config['DATASET'], config['PREFIX'], 'central', config['MATRIX_MISSING_LENGTH'], config['PERCENTAGE_MISSING_CLIENT'], config['MISSING_MODE'])
if config['PREFIX'] == 'normal':
    history_central_save_dir = os.path.join(outputs_prefix, config['DATASET'], config['PREFIX'], 'central')
if not os.path.exists(history_central_save_dir):
    os.makedirs(history_central_save_dir)

FLDT.to_json(f"{history_central_save_dir}/losses_rmses.json", losses_rmses_dict)

In [None]:
# Loss
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(refit_losses) + 1), refit_losses, label=f'Train')
plt.plot(range(1, len(refit_val_losses) + 1), refit_val_losses, label=f'Test')
# plt.plot(losses, label='Train')
# plt.plot(val_losses, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title(f'Central Model Loss Refit')
plt.show()

# RMSE
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(refit_rmses) + 1), refit_rmses, label=f'Train')
plt.plot(range(1, len(refit_val_rmses) + 1), refit_val_rmses, label=f'Test')
# plt.plot(rmses, label=f'Train')
# plt.plot(val_rmses, label=f'Test')
plt.xlabel('Epoch')
plt.ylabel('RMSE')
plt.legend()
plt.title(f'Central Model RMSE Refit')
plt.show()

In [None]:
train_predictions_arr = []
for i in range(config['NUM_CLIENTS']):
    _x_train, _y_train, _x_test, _y_test, _sc_cl = train_test_dataset[i]
    _train_predictions = FLDT.inverse_transform_predictions(model_arr[i].predict(_x_train), _sc_cl)
    train_predictions_arr.append(_train_predictions)

In [None]:
test_predictions_arr = []
for i in range(config['NUM_CLIENTS']):
    _x_train, _y_train, _x_test, _y_test, _sc_cl = train_test_dataset[i]
    _test_predictions = FLDT.inverse_transform_predictions(model_arr[i].predict(_x_test), _sc_cl)
    test_predictions_arr.append(_test_predictions)

In [None]:
for i in range(config['NUM_CLIENTS']):
    FLDT.prepare_and_plot(
        dictionary[f'client_{i}']['value'],
        train_predictions_arr[i],
        test_predictions_arr[i],
        config['LOOK_BACK'],
        f"FL Model - Client {i}"
    )

In [None]:
# Save the predictions
output_dir = os.path.join(config['FL_OUTPUT_DIR'], config['PREFIX'] + config['MATRIX_MISSING_LENGTH'], config['PERCENTAGE_MISSING_CLIENT'], config['MISSING_MODE'])
if config['PREFIX'] == 'normal':
    output_dir = os.path.join(config['FL_OUTPUT_DIR'], config['PREFIX'])

if not os.path.exists(output_dir):
    os.makedirs(output_dir, exist_ok=True)

predictions_files = {}
for i in range(config['NUM_CLIENTS']):
    predictions_files[f'client{i}_test_predictions'] = test_predictions_arr[i]
    predictions_files[f'client{i}_train_predictions'] = train_predictions_arr[i]
    predictions_files[f'client{i}_original'] = dictionary[f'client_{i}']['value']

for filename, data in predictions_files.items():
    pd.DataFrame(data).to_excel(os.path.join(output_dir, filename + '.xlsx'))