In [17]:
from joblib import load, dump
from io import BytesIO
import numpy as np
import pandas as pd

def get_model_params(model):
    """Qui stiamo serailizzando l'intera foresta"""
    buffer = BytesIO()
    dump(model, buffer)
    buffer.seek(0)
    return [np.frombuffer(buffer.getvalue(), dtype=np.uint8)]

In [20]:
from sklearn.ensemble import RandomForestClassifier

df = pd.DataFrame({'A': np.random.normal(10), 'B': np.random.normal(10), 'label': np.random.choice(['1', '0'], size=10)})
df.head(
)

Unnamed: 0,A,B,label
0,9.648776,8.845481,1
1,9.648776,8.845481,0
2,9.648776,8.845481,1
3,9.648776,8.845481,1
4,9.648776,8.845481,0


In [21]:

rf = RandomForestClassifier(n_estimators=100, max_depth=5)
rf.fit(df, df['label'])
params = get_model_params(rf)
print(params)

[array([128,   4, 149, ..., 117,  98,  46], dtype=uint8)]


In [22]:
print(rf.estimators_)

[DecisionTreeClassifier(max_depth=5, max_features='sqrt',
                       random_state=1181922886), DecisionTreeClassifier(max_depth=5, max_features='sqrt', random_state=301951942), DecisionTreeClassifier(max_depth=5, max_features='sqrt', random_state=932831322), DecisionTreeClassifier(max_depth=5, max_features='sqrt', random_state=473100014), DecisionTreeClassifier(max_depth=5, max_features='sqrt',
                       random_state=1354442794), DecisionTreeClassifier(max_depth=5, max_features='sqrt', random_state=927508512), DecisionTreeClassifier(max_depth=5, max_features='sqrt',
                       random_state=1397883333), DecisionTreeClassifier(max_depth=5, max_features='sqrt',
                       random_state=2006122220), DecisionTreeClassifier(max_depth=5, max_features='sqrt',
                       random_state=1097190279), DecisionTreeClassifier(max_depth=5, max_features='sqrt', random_state=602763055), DecisionTreeClassifier(max_depth=5, max_features='sqrt',
  

In [24]:
def set_model_params(model, params):
    """Deserializzazione"""
    buffer = BytesIO(params[0].tobytes())
    aggregated_model = load(buffer)
    model.n_estimators = len(aggregated_model.estimators_)
    model.estimators_ = aggregated_model.estimators_

    return model

In [25]:
model = set_model_params(rf, params)
print(model)

RandomForestClassifier(max_depth=5)


In [4]:
"""new-new-new-federation: A Flower / sklearn app."""

import numpy as np
import pandas as pd
from flwr_datasets import FederatedDataset
from flwr_datasets.partitioner import IidPartitioner
from sklearn.ensemble import RandomForestClassifier
from joblib import load, dump
from io import BytesIO

fds = None  # Cache FederatedDataset


def load_data(partition_id: int, num_partitions: int):
    """Load partition Kitsune data."""
    global fds
    if fds is None:
        partitioner = IidPartitioner(num_partitions=num_partitions)
        fds = FederatedDataset(
            dataset="n3p7un/KitsuneSystemAttackData_osScanDataset",
            partitioners={"train": partitioner},
        )

    dataset = fds.load_partition(partition_id, "train").with_format("numpy")

    # Get feature column names (exclude label and Unnamed: 0)
    feature_columns = [col for col in dataset.column_names if col not in ["label", "Unnamed: 0"]]

    # Extract features as a list of 1D arrays and stack them into 2D
    feature_arrays = [dataset[col] for col in feature_columns]
    X = np.column_stack(feature_arrays)  # Now shape (num_samples, num_features)
    y = dataset["label"]

    # Split data
    X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)):]
    y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)):]

    #print(f"X_train shape: {X_train.shape}")  # Should be (samples, features)
    #print(f"X_test shape: {X_test.shape}")  # Should be 2D
    return X_train, X_test, y_train, y_test

load_data(1, 2)



KeyboardInterrupt: 

In [None]:
# Load centralized test dataset from fds partition
test = fds.load_partition('test')[0].with_format('numpy')
    # Get feature column names (exclude label and Unnamed: 0)
feature_columns = [col for col in dataset.column_names if col not in ["label", "Unnamed: 0"]]
    # Extract features as a list of 1D arrays and stack them into 2D
feature_arrays = [dataset[col] for col in feature_columns]
X_test = np.column_stack(feature_arrays)
y_test = dataset["label"]