# Flower and Summaries


In [None]:
## Jump to the next block of code if you already installed the packages.

# Linux
!pip install -q flwr[simulation] torch torchvision matplotlib

# MacOs
#!pip3 install -U 'flwr[simulation]' torch torchvision scipy

In [None]:
import numpy as np

import flwr as fl
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris

In [None]:
# Define the number of clients
NUM_CLIENTS = 3

def load_data():
    iris = load_iris()
    # Create pandas DataFrame
    iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

    # Add label column (flower species)
    iris_df['species'] = iris.target

    # Optional: Map species numbers to names
    #iris_df['species'] = iris_df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

    return iris_df

def partition_data(X, num_clients):
    partition_size = len(X) // num_clients
    partitions = [(X[i * partition_size:(i + 1) * partition_size])
                  for i in range(num_clients)]
    return partitions

In [None]:
import pandas as pd
from collections import namedtuple

class PandasClient(fl.client.NumPyClient):
    def __init__(self, X, cid):
        self.X = X
        self.cid = cid

    def compute_hist(self, df, col_name):
        hist, bins = np.histogram(df[col_name])
        return hist, bins

    def fit(self, parameters, config):
        hist_list = []
        bins_list = []
        # Execute query locally
        for col in self.X.columns:
            hist, bins = self.compute_hist(self.X, col)
            hist_list.append(hist)
            bins_list.append(bins)
        array_of_arrays = [hist_list, bins_list]

        return array_of_arrays, len(self.X), {}


In [None]:
def client_fn(cid: str) -> fl.client.Client:
    X = load_data() # Load data
    num_clients = NUM_CLIENTS  # Number of clients should match the number of partitions
    partitions = partition_data(X, num_clients) # Create data partitions

    partition_id = int(cid) # Associate the partition id to client id (cid)
    # Each client gets a different X_train and y_train, so each client will train and test on their unique data
    X = partitions[partition_id]

    # Create a single Flower client representing a single organization/device
    # return PandasClient(X, cid).to_client() #for Flower version > 1.8
    return PandasClient(X, cid) #for Flower version < 1.8

In [None]:
from typing import Dict, List, Optional, Tuple, Union

import numpy as np

import flwr as fl
from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    Parameters,
    Scalar,
    ndarrays_to_parameters,
    parameters_to_ndarrays,
)
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.server.strategy import Strategy


class AggregateHistogram(Strategy):
    def initialize_parameters(
            self, client_manager: Optional[ClientManager] = None
    ) -> Optional[Parameters]:
        return None

    def configure_fit(
            self, server_round: int, parameters: Parameters, client_manager: ClientManager
    ) -> List[Tuple[ClientProxy, FitIns]]:
        config = {}
        fit_ins = FitIns(parameters, config)
        clients = client_manager.sample(num_clients=NUM_CLIENTS, min_num_clients=2)
        return [(client, fit_ins) for client in clients]

    def aggregate_fit(
            self,
            server_round: int,
            results: List[Tuple[ClientProxy, FitRes]],
            failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],
    ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:
        # Get results from fit
        # Convert results
        values_aggregated = [
            (parameters_to_ndarrays(fit_res.parameters)) for _, fit_res in results
        ]

        sepal_length_agg_hist = 0
        sepal_width_agg_hist = 0

        sepal_length_agg_bin = 0
        sepal_width_agg_bin = 0

        # For simplification, we are only using the first two columns of the Dataset
        for val in values_aggregated:
            sepal_length_agg_hist += val[0][0] # sepal length
            sepal_length_agg_bin += val[1][0]
            sepal_width_agg_hist += val[0][1] # sepal width
            sepal_width_agg_bin += val[1][1]

        dict_result = {
            'sepal_length_h': sepal_length_agg_hist,
            'sepal_width_h': sepal_width_agg_hist,
            'sepal_length_b': sepal_length_agg_bin,
            'sepal_width_b': sepal_width_agg_bin
        }

        ndarr = np.concatenate(
            (["Sepal Length"], sepal_length_agg_hist, ["Sepal Width"], sepal_width_agg_hist)
        )
        return ndarrays_to_parameters(ndarr), dict_result

    def evaluate(
            self, server_round: int, parameters: Parameters
    ) -> Optional[Tuple[float, Dict[str, Scalar]]]:
        agg_hist = [arr.item() for arr in parameters_to_ndarrays(parameters)]
        return 0, {}

    def configure_evaluate(
            self, server_round: int, parameters: Parameters, client_manager: ClientManager
    ) -> List[Tuple[ClientProxy, EvaluateIns]]:
        pass

    def aggregate_evaluate(
            self,
            server_round: int,
            results: List[Tuple[ClientProxy, EvaluateRes]],
            failures: List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]],
    ) -> Tuple[Optional[float], Dict[str, Scalar]]:
        pass


In [None]:
def fit_config(server_round) -> Dict:
    """Send round number to client."""
    config = {
        "server_round": server_round
    }
    return config

# Define the strategy
strategy = AggregateHistogram()

# Simulation configuration
# Check the Flower Framework documentation for more details about Flower Simulations
# and how to setup the client_resources
client_resources = {"num_cpus": 1}
num_clients = NUM_CLIENTS
num_rounds = 1

In [None]:
result = fl.simulation.start_simulation(
    strategy=strategy, # the strategy that will construct a client
    client_fn=client_fn, # a function to construct a client
    num_clients=num_clients, # total number of clients in the experiment
    config=fl.server.ServerConfig(num_rounds=1), #let's run for 5 rounds
    client_resources=client_resources,
)

In [None]:
print(result)

In [None]:
import matplotlib.pyplot as plt

# Aggregated histogram values
length = result.metrics_distributed_fit.get('sepal_length_h')[0][1]
width = result.metrics_distributed_fit.get('sepal_width_h')[0][1]

# Aggregated bin values
length_b = result.metrics_distributed_fit.get('sepal_length_b')[0][1]
width_b = result.metrics_distributed_fit.get('sepal_width_b')[0][1]

# Standard bins
num_bins = 10
# Calculate the centers of the bins for plotting
bin_centers_l = (length_b[:-1] + length_b[1:]) / 2
bin_centers_w = (width_b[:-1] + width_b[1:]) / 2

# Bar width
bar_width = 0.4

# Adjusting bar positions for side-by-side plotting
bar_positions_length = bin_centers_l - bar_width / 2
bar_positions_width = bin_centers_w + bar_width / 2

# Create the figure and subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Plot bars for Sepal Length
ax1.bar(bar_positions_length, length, width=bar_width, color='blue', alpha=0.7, edgecolor='black', label='Length')
ax1.set_title('Sepal Length')
ax1.set_xlabel('Value')
ax1.set_ylabel('Frequency')
ax1.set_xticks(bin_centers_l)

# Plot bars for Sepal Width
ax2.bar(bar_positions_width, width, width=bar_width, color='green', alpha=0.7, edgecolor='black', label='Width')
ax2.set_title('Sepal Width')
ax2.set_xlabel('Value')
ax2.set_ylabel('Frequency')
ax2.set_xticks(bin_centers_w)

# Adjust layout to avoid overlapping
plt.tight_layout()

# Show the graph
plt.show()
