<a href="https://colab.research.google.com/github/kdemertzis/TS_demer/blob/main/Fed%2Bonnx.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import onnxruntime as rt
from pycaret.regression import *

# Loading the training dataset
url = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed.csv'
df = pd.read_csv(url, header=None)

# Setting column names
df.columns = [str(i) for i in range(df.shape[1])]

# Splitting the dataset into smaller chunks
chunk_size = int(df.shape[0] / 10)
df_chunks = [df.iloc[i:i + chunk_size, :] for i in range(0, df.shape[0], chunk_size)]

# Initialize list to store the global model
global_models = []

# Repeat the process until convergence
converged = False
while not converged:
    models = []
    # Train a model on each chunk
    for chunk in df_chunks:
        exp_reg = setup(data=chunk, target=chunk.columns[-1])
        best_model = compare_models(n_select=1)
        models.append(best_model)
    
    # Aggregate the models
    global_model = aggregate_models(models)
    global_models.append(global_model)
    
    # Check for convergence
    if len(global_models) >= 2:
        # Compute the difference in accuracy between the latest and the previous global models
        accuracy_diff = evaluate_model(global_models[-1])['Accuracy'] - evaluate_model(global_models[-2])['Accuracy']
        if accuracy_diff <= 0.01:
            converged = True

# Save and return the final global model
final_model = global_models[-1]
model_onnx = export_model(final_model, 'onnx')


NameError: ignored

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import onnxruntime as rt
from pycaret.regression import *
import math

# Loading the training dataset
url = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed.csv'
df = pd.read_csv(url, header=None)

# Setting column names
df.columns = [str(i) for i in range(df.shape[1])]

# Split the dataset into smaller chunks
chunk_size = math.ceil(df.shape[0] / 4)
df_chunks = [df[i:i + chunk_size] for i in range(0, df.shape[0], chunk_size)]

# Initialize list to store the model updates from each device or participant
model_updates = []

# Train models locally on each device or participant
for chunk in df_chunks:
  # Initializing PyCaret
  exp_reg = setup(data=chunk, target=chunk.columns[-1])

  # Comparing models
  best_model = compare_models(n_select = 1) 

  # Evaluating the model
  evaluate_model(best_model)

  # Generating performance reports for the first models
  performance_reports_list = []
  for model in models_list:
      performance_reports_list.append(generate_model_report(model))

  # Store the model update
  model_updates.append(best_model)

# Aggregate the model updates from each device or participant to create an overall global model
global_model = None
for model in model_updates:
  if global_model is None:
    global_model = model
  else:
    global_model = aggregate_models(global_model, model)

# Repeat steps 2 and 3 multiple times until the global model converges to a satisfactory accuracy
# (You can set a threshold for accuracy, or you can run the loop for a certain number of iterations)

# Generating performance report for the final federated model
federated_model_performance_report = generate_model_report(final_federated_model)

# Saving performance reports as HTML files
for i, report in enumerate(performance_reports_list):
    report.save_report(file_name = f"First_Model_{i+1}_Performance_Report")

federated_model_performance_report.save_report(file_name = "Federated_Model_Performance_Report")

# Exporting the final global model to ONNX format
model_onnx = export_model(global_model, 'onnx')

# Loading the test dataset
url_test = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed_test.csv'
df_test = pd.read_csv(url_test, header=None)

# Setting column names
df_test.columns = [str(i) for i in range(df_test.shape[1])]

# Initializing ONNX Runtime
sess = rt.InferenceSession(model_onnx)

# Extracting input and output nodes
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name

# Running inference with ONNX runtime
prediction = sess.run([output_name], {input_name: df_test.values[:,:-1]})[0]

# Printing the prediction
print("Prediction: ", prediction)


In [None]:
# Generating performance reports for the first models
performance_reports_list = []
for model in models_list:
    performance_reports_list.append(generate_model_report(model))

# Generating performance report for the final federated model
federated_model_performance_report = generate_model_report(final_federated_model)

# Saving performance reports as HTML files
for i, report in enumerate(performance_reports_list):
    report.save_report(file_name = f"First_Model_{i+1}_Performance_Report")

federated_model_performance_report.save_report(file_name = "Federated_Model_Performance_Report")


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import onnxruntime as rt
from pycaret.regression import *

def federated_training(df, target, participants, chunk_size, early_stopping_rounds, regularization_coef, accuracy_threshold):
    # Split the data into chunks of equal size
    chunks = np.array_split(df, participants)
    
    # Scale the data on each device/participant
    for i in range(participants):
        scaler = MinMaxScaler()
        chunks[i] = scaler.fit_transform(chunks[i])
    
    # Initialize the first models
    models_list = []
    for i in range(participants):
        exp_reg = setup(data=chunks[i], target=target, silent=True)
        model = create_model("rf")
        models_list.append(model)
        
    # Initialize the federated model
    federated_model = None
    
    # Train the models and aggregate the model weights
    for iteration in range(early_stopping_rounds):
        for i in range(participants):
            models_list[i] = fit_model(models_list[i], early_stopping_rounds=1)
            
            # Regularize the model weights
            models_list[i].model.coef_ = models_list[i].model.coef_ / (1 + regularization_coef * iteration)
            
        # Aggregate the model weights
        if federated_model is None:
            federated_model = models_list[0]
        else:
            for i in range(participants):
                federated_model.model.coef_ += models_list[i].model.coef_
            federated_model.model.coef_ /= participants
        
        # Evaluate the performance on a validation set
        accuracy = evaluate_model(federated_model, fold=5)["Accuracy"]
        print(f"Iteration {iteration}: Accuracy = {accuracy}")
        
        # Check if accuracy has reached the threshold
        if accuracy >= accuracy_threshold:

        # Exporting the analytical report for the first models
          for i in range(len(models)):
              model_id = "Model " + str(i)
              model_performance_report = plot_model(models[i], plot='summary')
              model_performance_report.savefig(model_id + ' performance report.png')

        # Exporting the analytical report for the final federated model
        final_model_performance_report = plot_model(final_model, plot='summary')
        final_model_performance_report.savefig('Final federated model performance report.png')

           


In [11]:
!pip install lazypredict

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


In [12]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import onnxruntime as rt
import lazypredict

# Loading the training dataset
url = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed.csv'
df = pd.read_csv(url, header=None)

# Setting column names
df.columns = [str(i) for i in range(df.shape[1])]

# Splitting the data into smaller chunks
df_chunks = np.array_split(df, number_of_devices)

# Training models locally on each device
models = []
for i in range(number_of_devices):
  df_chunk = df_chunks[i]
  model = lazypredict.train(df_chunk, target_col=df.columns[-1])
  models.append(model)

# Aggregating the model updates to create a global model
global_model = lazypredict.Aggregate(models)

# Checking accuracy of global model
accuracy = lazypredict.get_accuracy(global_model, df_test)

# Setting accuracy threshold
accuracy_threshold = 0.9

# Repeating steps 2 and 3 until the global model converges to a satisfactory accuracy
while accuracy < accuracy_threshold:
  models = []
  for i in range(number_of_devices):
    df_chunk = df_chunks[i]
    model = lazypredict.train(df_chunk, target_col=df.columns[-1])
    models.append(model)
  global_model = lazypredict.Aggregate(models)
  accuracy = lazypredict.get_accuracy(global_model, df_test)

# Exporting the final global model to ONNX format
model_onnx = lazypredict.to_onnx(global_model)

# Initializing ONNX Runtime
sess = rt.InferenceSession(model_onnx)

# Extracting input and output nodes
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name

# Loading the test dataset
url_test = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed_test.csv'
df_test = pd.read_csv(url_test, header=None)

# Setting column names
df_test.columns = [str(i) for i in range(df_test.shape[1])]

# Running inference with ONNX runtime
prediction = sess.run([output_name], {input_name: df_test.values[:,:-1]})[0]

# Printing the prediction
print("Prediction: ", prediction)

# Generating analytical reports of performance of the first models and final federated model
lazypredict.summary(models[0])
lazypredict.summary(global_model)


NameError: ignored

In [13]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import onnxruntime as rt
import lazypredict

# Loading the training dataset
url = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed.csv'
df = pd.read_csv(url, header=None)

# Setting column names
df.columns = [str(i) for i in range(df.shape[1])]

# Initializing LazyPredict
global_model = lazypredict.train(data=df, target=df.columns[-1])

# Generating analytical reports of performance
lazypredict.summary(global_model)

# Plotting performance metrics
lazypredict.plot_regression(global_model, metrics=['r2', 'MAE', 'MSE'])

# Loading the test dataset
url_test = 'https://raw.githubusercontent.com/kdemertzis/TS_demer/main/Data/Fed_test.csv'
df_test = pd.read_csv(url_test, header=None)

# Setting column names
df_test.columns = [str(i) for i in range(df_test.shape[1])]

# Generating predictions on the test dataset
prediction = lazypredict.predict(global_model, df_test[df_test.columns[:-1]])

# Printing the prediction
print("Prediction: ", prediction)


AttributeError: ignored

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import onnxruntime as rt
from pycaret.regression import *

def federated_training(df, target, participants, chunk_size, early_stopping_rounds, regularization_coef, accuracy_threshold):
    # Split the data into chunks of equal size
    chunks = np.array_split(df, participants)
    
    # Scale the data on each device/participant
    for i in range(participants):
        scaler = MinMaxScaler()
        chunks[i] = scaler.fit_transform(chunks[i])
    
    # Initialize the first models
    models_list = []
    for i in range(participants):
        exp_reg = setup(data=chunks[i], target=target, silent=True)
        model = create_model("rf")
        models_list.append(model)
        
    # Initialize the federated model
    federated_model = None
    
    # Train the models and aggregate the model weights
    for iteration in range(early_stopping_rounds):
        for i in range(participants):
            models_list[i] = fit_model(models_list[i], early_stopping_rounds=1)
            
            # Regularize the model weights
            models_list[i].model.coef_ = models_list[i].model.coef_ / (1 + regularization_coef * iteration)
            
        # Aggregate the model weights
        if federated_model is None:
            federated_model = models_list[0]
        else:
            for i in range(participants):
                federated_model.model.coef_ += models_list[i].model.coef_
            federated_model.model.coef_ /= participants
        
        # Evaluate the performance on a validation set
        accuracy = evaluate_model(federated_model, fold=5)["Accuracy"]
        print(f"Iteration {iteration}: Accuracy = {accuracy}")
        
        # Check if accuracy has reached the threshold          
		if accuracy >= accuracy_threshold:
			# Exporting the analytical report for the first models
			for i in range(len(models)):
				model_id = "Model " + str(i)
				model_performance_report = plot_model(models[i], plot='summary')
				model_performance_report.savefig(model_id + ' performance report.png')

			# Exporting the analytical report for the final federated model
			final_model_performance_report = plot_model(final_model, plot='summary')
			final_model_performance_report.savefig('Final federated model performance report.png')


In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import xgboost as xgb

def federated_training(df, target, participants, chunk_size, early_stopping_rounds, regularization_coef, accuracy_threshold):
    # Split the data into chunks of equal size
    chunks = np.array_split(df, participants)
    
    # Scale the data on each device/participant
    for i in range(participants):
        scaler = MinMaxScaler()
        chunks[i] = scaler.fit_transform(chunks[i])
    
    # Initialize the first models
    models_list = []
    for i in range(participants):
        X_train, X_val, y_train, y_val = train_test_split(chunks[i].drop(target, axis=1), chunks[i][target], test_size=0.2, random_state=0)
        model = xgb.XGBRegressor(random_state=0, n_jobs=-1)
        model.fit(X_train, y_train)
        models_list.append(model)
        
    # Initialize the federated model
    federated_model = None
    
    # Train the models and aggregate the model weights
    for iteration in range(early_stopping_rounds):
        for i in range(participants):
            model = models_list[i]
            model.fit(X_train, y_train)
            
            # Regularize the model weights
            models_list[i]._Booster.set_attr("reg_lambda", regularization_coef * iteration)
            
        # Aggregate the model weights
        if federated_model is None:
            federated_model = models_list[0]
        else:
            for i in range(participants):
                federated_model._Booster.combine(models_list[i]._Booster, weight=1 / participants)
        
        # Evaluate the performance on a validation set
        accuracy = federated_model.score(X_val, y_val)
        print(f"Iteration {iteration}: Accuracy = {accuracy}")
        
        # Check if accuracy has reached the threshold          
        if accuracy >= accuracy_threshold:
            # Exporting the model to the ONNX format
            federated_model.to_onnx(model=federated_model, path="federated_model.onnx")
            break


NameError: ignored