<a href="https://colab.research.google.com/github/kdemertzis/TS_demer/blob/main/fedpcaretonnx.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Step 1: Install necessary packages
!pip install pycaret numpy pandas

# Step 2: Generate synthetic data
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification

np.random.seed(42)

n_samples = 1000
n_features = 10
n_classes = 2

X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, random_state=42)
data = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(n_features)])
data['target'] = y

# Step 3: Split the data for each partner
from sklearn.model_selection import train_test_split

n_partners = 5
partners_data = []

for i in range(n_partners):
    train_data, data = train_test_split(data, train_size=0.2, random_state=i)
    partners_data.append(train_data)

# Step 4: Train models for each partner
from pycaret.classification import setup, compare_models, finalize_model

partners_models = []

for i, partner_data in enumerate(partners_data):
    print(f"Training model for partner {i + 1}")
    grid = setup(data=partner_data, target='target', verbose=False)

    best_model = compare_models(sort='AUC', n_select=1, verbose=False)
    partners_models.append(finalize_model(best_model))

# Step 5: Aggregate models and train a global model
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[(f'partner_{i}', model) for i, model in enumerate(partners_models)], voting='soft')
global_model = voting_clf.fit(partners_data[0].drop(columns=['target']), partners_data[0]['target'])

# Step 6: Evaluate the global model
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

X_test = data.drop(columns=['target'])
y_test = data['target']

y_pred = global_model.predict(X_test)
y_prob = global_model.predict_proba(X_test)[:, 1]

print("Global Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_prob))
print("Classification Report:\n", classification_report(y_test, y_pred))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Training model for partner 1
Training model for partner 2
Training model for partner 3
Training model for partner 4
Training model for partner 5
Global Model Evaluation:
Accuracy: 0.9298780487804879
AUC: 0.9694441312436577
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.95      0.94       181
           1       0.94      0.90      0.92       147

    accuracy                           0.93       328
   macro avg       0.93      0.93      0.93       328
weighted avg       0.93      0.93      0.93       328



In [3]:
# Step 1: Install necessary packages
!pip install pycaret numpy pandas

# Step 2: Generate synthetic data
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification

np.random.seed(42)

n_samples = 1000
n_features = 10
n_classes = 2

X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, random_state=42)
data = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(n_features)])
data['target'] = y

# Step 3: Split the data for each partner
from sklearn.model_selection import train_test_split

n_partners = 5
partners_data = []

for i in range(n_partners):
    train_data, data = train_test_split(data, train_size=0.2, random_state=i)
    partners_data.append(train_data)

# Step 4: Train models for each partner
from pycaret.classification import setup, compare_models, finalize_model, pull

partners_models = []
partners_best_scores = []

for i, partner_data in enumerate(partners_data):
    print(f"Training model for partner {i + 1}")
    grid = setup(data=partner_data, target='target', verbose=False)
    best_model = compare_models(sort='AUC', n_select=1, verbose=False)
    partners_models.append(finalize_model(best_model))

    # Store and print partner's best model performance
    best_model_score = pull().iloc[0]['AUC']
    partners_best_scores.append(best_model_score)
    print(f"Best Model for Partner {i + 1}: {best_model.__class__.__name__}, AUC: {best_model_score:.4f}")

# Step 5: Aggregate models and train a global model
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[(f'partner_{i}', model) for i, model in enumerate(partners_models)], voting='soft')
global_model = voting_clf.fit(partners_data[0].drop(columns=['target']), partners_data[0]['target'])

# Step 6: Evaluate the global model
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

X_test = data.drop(columns=['target'])
y_test = data['target']

y_pred = global_model.predict(X_test)
y_prob = global_model.predict_proba(X_test)[:, 1]

print("\nGlobal Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_prob))
print("Classification Report:\n", classification_report(y_test, y_pred))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Training model for partner 1
Best Model for Partner 1: LGBMClassifier, AUC: 0.9188
Training model for partner 2
Best Model for Partner 2: RandomForestClassifier, AUC: 0.9300
Training model for partner 3
Best Model for Partner 3: AdaBoostClassifier, AUC: 0.9950
Training model for partner 4
Best Model for Partner 4: LogisticRegression, AUC: 0.9354
Training model for partner 5
Best Model for Partner 5: GaussianNB, AUC: 0.9556

Global Model Evaluation:
Accuracy: 0.9237804878048781
AUC: 0.9621904010222874
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.94      0.93       181
           1       0.93      0.90      0.91       147

    accuracy                           0.92       328
   macro avg       0.92      0.92      0.92       328
weighted avg       0.92      0.92      0.92       328



In [None]:
# Step 1: Install necessary packages
!pip install pycaret numpy pandas

# Step 2: Generate synthetic data
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification

np.random.seed(42)

n_samples = 1000
n_features = 10
n_classes = 2

X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, random_state=42)
data = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(n_features)])
data['target'] = y

# Step 3: Split the data for each partner
from sklearn.model_selection import train_test_split

n_partners = 5
partners_data = []

for i in range(n_partners):
    train_data, data = train_test_split(data, train_size=0.2, random_state=i)
    partners_data.append(train_data)

# Step 4: Train and tune models for each partner
from pycaret.classification import setup, compare_models, tune_model, finalize_model, pull

partners_models = []
partners_best_scores = []

for i, partner_data in enumerate(partners_data):
    print(f"Training model for partner {i + 1}")
    grid = setup(data=partner_data, target='target', verbose=False)
    best_model = compare_models(sort='AUC', n_select=1, verbose=False)

    # Perform hyperparameter tuning
    print(f"Tuning {best_model.__class__.__name__} for partner {i + 1}")
    tuned_model = tune_model(best_model, optimize='AUC', n_iter=50, verbose=False)
    
    partners_models.append(finalize_model(tuned_model))

    # Store and print partner's best model performance
    best_model_score = pull().iloc[0]['AUC']
    partners_best_scores.append(best_model_score)
    print(f"Best Tuned Model for Partner {i + 1}: {tuned_model.__class__.__name__}, AUC: {best_model_score:.4f}")

# Step 5: Aggregate models and train a global model
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[(f'partner_{i}', model) for i, model in enumerate(partners_models)], voting='soft')
global_model = voting_clf.fit(partners_data[0].drop(columns=['target']), partners_data[0]['target'])

# Step 6: Evaluate the global model
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

X_test = data.drop(columns=['target'])
y_test = data['target']

y_pred = global_model.predict(X_test)
y_prob = global_model.predict_proba(X_test)[:, 1]

print("\nGlobal Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_prob))
print("Classification Report:\n", classification_report(y_test, y_pred))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Training model for partner 1
Tuning ExtraTreesClassifier for partner 1
Best Tuned Model for Partner 1: ExtraTreesClassifier, AUC: 0.8889
Training model for partner 2
Tuning LGBMClassifier for partner 2


In [None]:
!pip install onnxmltools skl2onnx

In [None]:
# Import necessary libraries for ONNX conversion
import onnxmltools
from skl2onnx.common.data_types import FloatTensorType

# Convert the global model to ONNX format
initial_type = [('input', FloatTensorType([None, n_features]))]
onnx_model = onnxmltools.convert_sklearn(global_model, initial_types=initial_type, target_opset=13)

# Save the ONNX model to a file
with open("ensemble_model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

print("Ensemble model successfully converted to ONNX format and saved as 'ensemble_model.onnx'")