## Prepare Data

In [57]:
import os
import glob 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

In [58]:
df = pd.read_parquet("../metrics/epoch_metrics")

In [59]:
grouped = df.groupby(['dataset_name', 'strat_name', 'num_clients', 'batch_size', 'theta', 'nn_name'])

In [60]:
df['total_time'] = grouped['epoch_duration_sec'].cumsum()

In [61]:
def dataset_size(ds_name):
    if ds_name == 'CIFAR10':
        return 50_000
    if ds_name == 'MNIST':
        return 60_000
    return None

In [62]:
df['ds_samples'] = df['dataset_name'].apply(dataset_size)

In [63]:
def model_num_weights(nn_name):
    if nn_name == 'LeNet-5':
        return 61_706
    if nn_name == 'AdvancedCNN':
        return 2_592_202
    if nn_name == 'DenseNet121':
        return 6_964_106
    if nn_name == 'DenseNet201':
        return 18_112_138
    return None

In [64]:
df['num_weights'] = df['nn_name'].apply(model_num_weights)

In [106]:
def local_state_size(strat_name):
    if strat_name == 'naive':
        return 1
    if strat_name == 'linear':
        return 2
    if strat_name == 'sketch':
        return 1000
    return None

In [108]:
df['local_state_size'] = df['strat_name'].apply(local_state_size)

In [67]:
def sample_size(ds_name):
    if ds_name == 'CIFAR10':
        return 32 * 32 * 3
    if ds_name == 'MNIST':
        return 28 * 28
    return None

In [68]:
df['sample_size'] = df['dataset_name'].apply(sample_size)

In [212]:
def local_state_computation_cost(row):
    if row['strat_name'] == 'naive':
        return row['num_weights']  # O(d)
    if row['strat_name'] == 'linear':
        return row['num_weights']  #  O(2*d)
    if row['strat_name'] == 'sketch':
        return row['num_weights'] * 5 * 250  #  O(width * d)
    return None

In [213]:
df['local_state_computation_cost'] = df.apply(local_state_computation_cost, axis=1)

## Regression

In [173]:
def linear_regression(data_X_df, data_y_df):    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(data_X_df, data_y_df, test_size=0.1)

    regr_model = linear_model.LinearRegression()

    regr_model.fit(X_train, y_train)

    y_pred = regr_model.predict(X_test)

    # Evaluate the model
    print("Linear Regression")
    print(f"Coefficients: {regr_model.coef_}\n")
    print("Linear Regression")
    print(f"RMS error: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
    print(f"Coefficient of determination: {r2_score(y_test, y_pred):.2f}")
    
    return regr_model

## NaiveFDA


In [208]:
df_strat = df[df.strat_name == 'naive']

data_X_df = pd.DataFrame(
    {
        'train_computation_cost': df_strat['total_fda_steps'] * df_strat['sample_size'] * df_strat['num_weights'] * df_strat['batch_size'],
        #'local_state_computation_cost': df_strat['local_state_computation_cost'],
        
        'sync_communication_cost': df_strat['total_rounds'] * df_strat['num_weights'] * np.ceil(np.log2(df_strat['num_clients'])),
        #'monitor_communication_cost': df_strat['total_fda_steps'] * df_strat['monitoring_size'] * np.ceil(np.log2(df_strat['num_clients']))
    }
)

data_y_df = df_strat['total_time']

In [209]:
regr_model = linear_regression(data_X_df, data_y_df)

Linear Regression
Coefficients: [1.14948881e-12 1.75574294e-07]

Linear Regression
RMS error: 105.80
Coefficient of determination: 1.00


## LinearFDA

In [198]:
df_strat = df[df.strat_name == 'linear']

data_X_df = pd.DataFrame(
    {
        'train_computation_cost': df_strat['total_fda_steps'] * df_strat['sample_size'] * df_strat['num_weights'] * df_strat['batch_size'],
        #'local_state_computation_cost': df_strat['local_state_computation_cost'],
        
        'sync_communication_cost': df_strat['total_rounds'] * df_strat['num_weights'] * np.ceil(np.log2(df_strat['num_clients'])),
        'monitor_communication_cost': df_strat['total_fda_steps'] * df_strat['monitoring_size'] * np.ceil(np.log2(df_strat['num_clients']))
    }
)

data_y_df = df_strat['total_time']

In [199]:
regr_model = linear_regression(data_X_df, data_y_df)

Linear Regression
Coefficients: [ 1.17767574e-12  3.60731480e-07 -1.03784648e-02]

Linear Regression
RMS error: 71.70
Coefficient of determination: 1.00


## SketchFDA

In [196]:
df_strat = df[df.strat_name == 'sketch']

data_X_df = pd.DataFrame(
    {
        'train_computation_cost': df_strat['total_fda_steps'] * df_strat['sample_size'] * df_strat['num_weights'] * df_strat['batch_size'],
        #'local_state_computation_cost': df_strat['local_state_computation_cost'],
        
        'sync_communication_cost': df_strat['total_rounds'] * df_strat['num_weights'] * np.ceil(np.log2(df_strat['num_clients'])),
        'monitor_communication_cost': df_strat['total_fda_steps'] * df_strat['monitoring_size'] * np.ceil(np.log2(df_strat['num_clients']))
    }
)

data_y_df = df_strat['total_time']

In [197]:
regr_model = linear_regression(data_X_df, data_y_df)

Linear Regression
Coefficients: [1.28072892e-12 1.23004150e-07 4.05282662e-06]

Linear Regression
RMS error: 27.00
Coefficient of determination: 1.00


## All FDA Strategies together

In [137]:
data_X_df = pd.DataFrame(
    {
        'computation_cost': df['total_fda_steps'] * df['sample_size'] * df['num_weights'] * df['batch_size'],
        'local_state_computation_cost': df['local_state_computation_cost'],
        
        'sync_communication_cost': df['total_rounds'] * df['num_weights'] * np.ceil(np.log2(df['num_clients'])),
        'monitor_communication_cost': df['total_fda_steps'] * df['monitoring_size'] * np.ceil(np.log2(df['num_clients']))
    }
)

data_y_df = df['total_time']

In [138]:
regr_model = linear_regression(data_X_df, data_y_df)

Linear Regression
Coefficients: [1.14903881e-12 1.14444198e-08 1.98555138e-07 4.19920164e-06]

Linear Regression
RMS error: 100.77
Coefficient of determination: 1.00
