In [21]:
import torch
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
import seaborn as sns

from simple_einet.einet import Einet, EinetConfig
from simple_einet.layers.distributions.piecewise_linear import PiecewiseLinear
from simple_einet.dist import DataType, Domain

### Import, Preprocess and Split the Dataset 

Traditional non-federated learning mode

In [39]:
data = fetch_openml('adult', version=2, as_frame=True)
df = data.frame

df = df.replace('?', np.nan)
df_clean = df.dropna()
X = df_clean.drop('class', axis=1)
y = df_clean['class']


In [40]:
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['category', 'object']).columns.tolist()

print(f"Numerical feat: ({len(numeric_features)}): {numeric_features}")
print(f"Categorical feat: ({len(categorical_features)}): {categorical_features}")

Numerical feat: (6): ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
Categorical feat: (8): ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']


In [19]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', LabelEncoder(), categorical_features)
    ]
)
X_numeric = StandardScaler().fit_transform(X[numeric_features])
X_numeric_df = pd.DataFrame(X_numeric, columns=numeric_features, index=X.index)

X_categorical_encoded = pd.DataFrame(index=X.index)
for col in categorical_features:
    le = LabelEncoder()
    X_categorical_encoded[col] = le.fit_transform(X[col].astype(str))
    
X_processed = pd.concat([X_numeric_df, X_categorical_encoded], axis=1)
y_encoded = LabelEncoder().fit_transform(y)

print(f"X shape after preprocessed: {X_processed.shape}")
print(f"Target unique: {np.unique(y_encoded)}")

X shape after preprocessed: (45222, 14)
Target unique: [0 1]


In [20]:
X_train, X_test, y_train, y_test = train_test_split(
    X_processed.values, y_encoded, test_size=0.33, random_state=42, stratify=y_encoded
)

X_train_tensor = torch.tensor(X_train).float()
X_test_tensor = torch.tensor(X_test).float()
y_train_tensor = torch.tensor(y_train).long()
y_test_tensor = torch.tensor(y_test).long()

print(f"X Train shape: {X_train_tensor.shape}")
print(f"X Test shape: {X_test_tensor.shape}")

X Train shape: torch.Size([30298, 14])
X Test shape: torch.Size([14924, 14])


### Construct the domain used for Einet with Piecewise Distribution

In [23]:
domains = []
all_features = numeric_features + categorical_features

for i, feature in enumerate(all_features):
    if feature in numeric_features:
        # 數值型特徵使用連續域
        domains.append(Domain(data_type=DataType.CONTINUOUS))
    else:
        # 類別型特徵使用離散域
        # 獲取唯一值作為離散值域
        unique_values = sorted(X_processed[feature].unique())
        domains.append(Domain(data_type=DataType.DISCRETE, values=unique_values))

print(f"Defined {len(domains)} feature domains.")

Defined 14 feature domains.


### Configure Einet

In [32]:
# 重塑訓練資料為 EiNet 所需的格式 [batch_size, channels, features]
# 這裡 channels=1，因為我們沒有多通道資料
X_train_reshaped = X_train_tensor.unsqueeze(1)  # [batch_size, 1, features]

# 配置 EiNet 使用 PiecewiseLinear 分布
config = EinetConfig(
    num_features=X_train_tensor.shape[1],  # 特徵數量
    depth=2,  # 網路深度
    num_sums=10,  # sum nodes 數量
    num_leaves=10,  # leaf nodes 數量  
    num_repetitions=5,  # 重複數量
    num_classes=2,  # 分類類別數（<=50K, >50K）
    leaf_type=PiecewiseLinear,  # 使用 PiecewiseLinear 分布
    leaf_kwargs={'alpha': 0.1},  # Laplace 平滑參數
    dropout=0.0
)

# 初始化模型
model = Einet(config)
print(f"模型參數數量: {sum(p.numel() for p in model.parameters())}")

模型參數數量: 1119


In [33]:
model.leaf.base_leaf.initialize(X_train_reshaped, domains)

Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 5/5 [00:00<00:00,  8.41it/s]


### Train the model

In [194]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
cross_entropy = torch.nn.CrossEntropyLoss()

def accuracy(model, X, y):
    with torch.no_grad():
        outputs = model(X)
        predictions = outputs.argmax(-1) 
        correct = (predictions == y).sum()
        total = y.shape[0]
        return 100. * correct / total


def f1(model, X, y, num_classes=None):
    with torch.no_grad():
        outputs = model(X)
        predictions = outputs.argmax(-1)
        # 假設 y 和 predictions 為 1D tensor
        if num_classes is None:
            num_classes = int(torch.max(y).item()) + 1
        f1_scores = []
        for c in range(num_classes):
            tp = ((predictions == c) & (y == c)).sum().item()
            fp = ((predictions == c) & (y != c)).sum().item()
            fn = ((predictions != c) & (y == c)).sum().item()
            if tp + fp + fn == 0:
                f1 = 0.0  # 防止0除
            else:
                precision = tp / (tp + fp) if (tp + fp) != 0 else 0.0
                recall = tp / (tp + fn) if (tp + fn) != 0 else 0.0
                if precision + recall == 0:
                    f1 = 0.0
                else:
                    f1 = 2 * (precision * recall) / (precision + recall)
            f1_scores.append(f1)
        # 取 macro-average F1
        return 100. * sum(f1_scores) / len(f1_scores)


X_test_reshaped = X_test_tensor.unsqueeze(1)

num_epochs = 10
print("Start training...")
for epoch in range(num_epochs):
    optimizer.zero_grad()
    log_likelihoods = model(X_train_reshaped)
    loss = cross_entropy(log_likelihoods, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 5 == 0:
        acc_train = accuracy(model, X_train_reshaped, y_train_tensor)
        acc_test = accuracy(model, X_test_reshaped, y_test_tensor)
        print(f"Epoch: {epoch+1:2d}, Loss: {loss.item():.4f}, "
              f"Train Acc: {acc_train:.2f}%, Test Acc: {acc_test:.2f}%")

print("Finished training！")

Start training...
Epoch:  5, Loss: 0.4959, Train Acc: 64.07%, Test Acc: 62.89%
Epoch: 10, Loss: 0.4865, Train Acc: 64.86%, Test Acc: 64.05%
Finished training！


### Going Federated by constructing a Data Partitioner for 
- Horizontal 
- Vertical
- Hybrid 

In [36]:
import random
from typing import Dict, List

In [84]:
# 健壯版聯邦學習資料分割器（與之前相同）
class FederatedDataPartitionerRobust:
    """健壯版聯邦學習資料分割器"""
    
    def __init__(self, X, y, feature_names, numeric_features, categorical_features):
        self.X = X
        self.y = y
        self.feature_names = feature_names
        self.numeric_features = numeric_features
        self.categorical_features = categorical_features
        
    def horizontal_partition(self, num_clients: int = 3, random_state: int = 42) -> Dict:
        """水平分割：相同特徵，不同樣本"""
        print(f"🔄 執行水平分割，分成 {num_clients} 個客戶端...")
        
        np.random.seed(random_state)
        n_samples = len(self.X)
        indices = np.arange(n_samples)
        np.random.shuffle(indices)
        
        clients = {}
        samples_per_client = n_samples // num_clients
        
        for i in range(num_clients):
            start_idx = i * samples_per_client
            if i == num_clients - 1:
                end_idx = n_samples
            else:
                end_idx = (i + 1) * samples_per_client
                
            client_indices = indices[start_idx:end_idx]
            
            clients[f'client_{i}'] = {
                'X': self.X[client_indices],
                'y': self.y[client_indices],
                'features': self.feature_names,
                'numeric_features': self.numeric_features,
                'categorical_features': self.categorical_features,
                'n_samples': len(client_indices),
                'n_features': len(self.feature_names),
                'sample_indices': client_indices,
                'feature_indices': list(range(len(self.feature_names))),
                'feature_overlap': self.feature_names  # 完全重疊
            }
            
            print(f"  客戶端 {i}: {len(client_indices)} 樣本, {len(self.feature_names)} 特徵")
            
        return {
            'type': 'horizontal',
            'clients': clients,
            'total_samples': n_samples,
            'total_features': len(self.feature_names)
        }
    
    def vertical_partition(self, num_clients: int = 3, random_state: int = 42) -> Dict:
        """垂直分割：相同樣本，不同特徵"""
        print(f"🔄 執行垂直分割，分成 {num_clients} 個客戶端...")
        
        random.seed(random_state)
        all_features = self.feature_names.copy()
        random.shuffle(all_features)
        
        features_per_client = len(all_features) // num_clients
        clients = {}
        
        for i in range(num_clients):
            start_idx = i * features_per_client
            if i == num_clients - 1:
                end_idx = len(all_features)
            else:
                end_idx = (i + 1) * features_per_client
                
            client_features = all_features[start_idx:end_idx]
            client_numeric = [f for f in client_features if f in self.numeric_features]
            client_categorical = [f for f in client_features if f in self.categorical_features]
            
            feature_indices = [self.feature_names.index(f) for f in client_features]
            
            clients[f'client_{i}'] = {
                'X': self.X[:, feature_indices],
                'y': self.y,
                'features': client_features,
                'numeric_features': client_numeric,
                'categorical_features': client_categorical,
                'n_samples': len(self.X),
                'n_features': len(client_features),
                'feature_indices': feature_indices,
                'sample_indices': list(range(len(self.X))),
                'feature_overlap': []  # 垂直分割無特徵重疊
            }
            
            print(f"  客戶端 {i}: {len(self.X)} 樣本, {len(client_features)} 特徵")
            
        return {
            'type': 'vertical', 
            'clients': clients,
            'total_samples': len(self.X),
            'total_features': len(self.feature_names)
        }
    
    def hybrid_partition(self, num_clients: int = 4, 
                        sample_overlap_ratio: float = 0.3,
                        feature_overlap_ratio: float = 0.2,
                        random_state: int = 42) -> Dict:
        """健壯版混合分割：正確實現樣本和特徵重疊"""
        print(f"🔄 執行健壯版混合分割，分成 {num_clients} 個客戶端...")
        print(f"  樣本重疊比例: {sample_overlap_ratio:.1%}")
        print(f"  特徵重疊比例: {feature_overlap_ratio:.1%}")
        
        np.random.seed(random_state)
        random.seed(random_state)
        
        n_samples = len(self.X)
        n_features = len(self.feature_names)
        
        # 樣本分配策略
        base_samples_per_client = max(1, int(n_samples * 0.5 / num_clients))
        overlap_sample_count = int(n_samples * sample_overlap_ratio)
        all_sample_indices = np.arange(n_samples, dtype=int)
        np.random.shuffle(all_sample_indices)
        
        base_samples_end = min(base_samples_per_client * num_clients, n_samples)
        base_sample_indices = all_sample_indices[:base_samples_end]
        
        if overlap_sample_count > 0 and base_samples_end < n_samples:
            remaining_samples = all_sample_indices[base_samples_end:]
            overlap_sample_indices = remaining_samples[:min(overlap_sample_count, len(remaining_samples))]
        else:
            overlap_sample_indices = np.array([], dtype=int)
        
        # 特徵分配策略
        base_features_per_client = max(1, int(n_features * 0.6 / num_clients))
        overlap_feature_count = int(n_features * feature_overlap_ratio)
        all_feature_indices = np.arange(n_features, dtype=int)
        np.random.shuffle(all_feature_indices)
        
        base_features_end = min(base_features_per_client * num_clients, n_features)
        base_feature_indices = all_feature_indices[:base_features_end]
        
        if overlap_feature_count > 0 and base_features_end < n_features:
            remaining_features = all_feature_indices[base_features_end:]
            overlap_feature_indices = remaining_features[:min(overlap_feature_count, len(remaining_features))]
        else:
            overlap_feature_indices = np.array([], dtype=int)
        
        print(f"  基礎樣本: {len(base_sample_indices)} 個，重疊樣本池: {len(overlap_sample_indices)} 個")
        print(f"  基礎特徵: {len(base_feature_indices)} 個，重疊特徵池: {len(overlap_feature_indices)} 個")
        
        # 為每個客戶端分配資料
        clients = {}
        
        for i in range(num_clients):
            # 樣本分配
            client_base_start = i * base_samples_per_client
            client_base_end = min((i + 1) * base_samples_per_client, len(base_sample_indices))
            client_base_samples = base_sample_indices[client_base_start:client_base_end]
            
            if len(overlap_sample_indices) > 0:
                overlap_sample_size = min(len(overlap_sample_indices), len(client_base_samples) // 2)
                if overlap_sample_size > 0:
                    client_overlap_samples = np.random.choice(
                        overlap_sample_indices, size=overlap_sample_size, replace=False
                    )
                else:
                    client_overlap_samples = np.array([], dtype=int)
            else:
                client_overlap_samples = np.array([], dtype=int)
            
            if len(client_overlap_samples) > 0:
                client_sample_indices = np.concatenate([client_base_samples, client_overlap_samples])
            else:
                client_sample_indices = client_base_samples.copy()
            client_sample_indices = np.unique(client_sample_indices)
            
            # 特徵分配
            client_base_feat_start = i * base_features_per_client
            client_base_feat_end = min((i + 1) * base_features_per_client, len(base_feature_indices))
            client_base_features = base_feature_indices[client_base_feat_start:client_base_feat_end]
            
            if len(overlap_feature_indices) > 0:
                guaranteed_overlap_size = max(1, len(overlap_feature_indices) // 2)
                guaranteed_overlap_features = overlap_feature_indices[:guaranteed_overlap_size]
                client_overlap_features = guaranteed_overlap_features
            else:
                client_overlap_features = np.array([], dtype=int)
            
            if len(client_overlap_features) > 0:
                client_feature_indices = np.concatenate([client_base_features, client_overlap_features])
            else:
                client_feature_indices = client_base_features.copy()
            client_feature_indices = np.unique(client_feature_indices.astype(int))
            
            # 確保至少有一個特徵
            if len(client_feature_indices) == 0:
                client_feature_indices = np.array([0], dtype=int)
            
            # 獲取特徵名稱
            client_features = [self.feature_names[idx] for idx in client_feature_indices]
            client_numeric = [f for f in client_features if f in self.numeric_features]
            client_categorical = [f for f in client_features if f in self.categorical_features]
            
            # 計算重疊特徵
            overlap_features_names = []
            if len(client_overlap_features) > 0:
                overlap_features_names = [self.feature_names[idx] for idx in client_overlap_features]
            
            clients[f'client_{i}'] = {
                'X': self.X[np.ix_(client_sample_indices, client_feature_indices)],
                'y': self.y[client_sample_indices],
                'features': client_features,
                'numeric_features': client_numeric,
                'categorical_features': client_categorical,
                'n_samples': len(client_sample_indices),
                'n_features': len(client_features),
                'feature_indices': client_feature_indices,
                'sample_indices': client_sample_indices,
                'base_sample_count': len(client_base_samples),
                'overlap_sample_count': len(client_overlap_samples),
                'base_feature_count': len(client_base_features),
                'overlap_feature_count': len(client_overlap_features),
                'feature_overlap': overlap_features_names
            }
            
            print(f"  客戶端 {i}: {len(client_sample_indices)} 樣本 × {len(client_features)} 特徵")
        
        return {
            'type': 'hybrid_robust',
            'clients': clients,
            'total_samples': n_samples,
            'total_features': n_features,
            'sample_overlap_ratio': sample_overlap_ratio,
            'feature_overlap_ratio': feature_overlap_ratio
        }


In [208]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
import time


def _accuracy(model, X, y):
    with torch.no_grad():
        outputs = model(X)
        predictions = outputs.argmax(-1) 
        correct = (predictions == y).sum()
        total = y.shape[0]
        return 100. * correct / total


def _f1_score(model, X, y, num_classes=None):
    with torch.no_grad():
        outputs = model(X)
        predictions = outputs.argmax(-1)
        # 假設 y 和 predictions 為 1D tensor
        if num_classes is None:
            num_classes = int(torch.max(y).item()) + 1
        f1_scores = []
        for c in range(num_classes):
            tp = ((predictions == c) & (y == c)).sum().item()
            fp = ((predictions == c) & (y != c)).sum().item()
            fn = ((predictions != c) & (y == c)).sum().item()
            if tp + fp + fn == 0:
                f1 = 0.0  # 防止0除
            else:
                precision = tp / (tp + fp) if (tp + fp) != 0 else 0.0
                recall = tp / (tp + fn) if (tp + fn) != 0 else 0.0
                if precision + recall == 0:
                    f1 = 0.0
                else:
                    f1 = 2 * (precision * recall) / (precision + recall)
            f1_scores.append(f1)
        # 取 macro-average F1
        return 100. * sum(f1_scores) / len(f1_scores)
    
# 基於 simple-einet API 的聯邦學習訓練器
class FederatedEiNetTrainer:
    """
    聯邦 EiNet 訓練器 - 使用 simple-einet API 風格
    """
    
    def __init__(self, partition_info: Dict):
        self.partition_info = partition_info
        self.client_models = {}
        self.client_domains = {}
        self.training_history = {}
        
    def create_domains(self, features: List[str], numeric_features: List[str], 
                      categorical_features: List[str], X_processed: pd.DataFrame) -> List:
        """為給定特徵創建 domains"""
        domains = []
        
        for feature in features:
            if feature in numeric_features:
                if feature in X_processed.columns:
                    min_val = float(X_processed[feature].min())
                    max_val = float(X_processed[feature].max())
                    domains.append(Domain.continuous_range(min_val, max_val))
                else:
                    # 如果特徵不在 X_processed 中，使用預設範圍
                    domains.append(Domain.continuous_range(-3.0, 3.0))
            else:
                if feature in X_processed.columns:
                    values = sorted(X_processed[feature].unique().tolist())
                    domains.append(Domain.discrete_bins(values))
                else:
                    # 如果特徵不在 X_processed 中，使用預設值
                    domains.append(Domain.discrete_bins([0, 1]))
                
        return domains
    
    def train_client(self, client_id: str, client_data: Dict, X_processed: pd.DataFrame,
                    epochs: int = 100, verbose: bool = False) -> Dict:
        """訓練單個客戶端的 EiNet 模型"""
        
        X_client = client_data['X']
        y_client = client_data['y']
        
        X_client_reshaped = X_client.unsqueeze(1)
        
        # 創建該客戶端特徵的 domains
        domains = self.create_domains(
            client_data['features'],
            client_data['numeric_features'],
            client_data['categorical_features'],
            X_processed,
        )
        
        num_features = client_data['n_features']
        
        # 動態調整模型複雜度
        if num_features < 3:
            depth, num_sums, num_leaves = 1, 4, 4
        elif num_features < 6:
            depth, num_sums, num_leaves = 1, 8, 8  
        else:
            depth, num_sums, num_leaves = 2, 12, 12
            
        config = EinetConfig(
            num_features=num_features,
            depth=depth,
            num_sums=num_sums,
            num_leaves=num_leaves,
            num_repetitions=3,
            num_classes=2,
            leaf_type=PiecewiseLinear,
            leaf_kwargs={'alpha': 0.1},
            dropout=0.0
        )
        
        model = Einet(config)
        model.leaf.base_leaf.initialize(X_client_reshaped, domains)
        
        cross_entropy = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        
        if verbose:
            print(f"    📊 模型配置: depth={depth}, sums={num_sums}, leaves={num_leaves}")
            print(f"    🔧 特徵域: {len(domains)} 個 domain")
        
        start_time = time.time()
        
        for epoch in range(epochs):
            optimizer.zero_grad()
            ll = model(X_client_reshaped)
            loss = cross_entropy(ll, y_client)
            loss.backward()
            optimizer.step()
            
            if (epoch + 1) % 5 == 0:
                acc_train = _accuracy(model, X_client_reshaped, y_client)
                f1_train = _f1_score(model, X_client_reshaped, y_client)
                print(f"Epoch: {epoch+1:2d}, Loss: {loss.item():.4f}, "
                      f"Train Acc: {acc_train:.2f}%, F1: {f1_train:.2f}%")
            
        training_time = time.time() - start_time
        
        train_accuracy = _accuracy(model, X_client_reshaped, y_client)
        train_f1 = _f1_score(model, X_client_reshaped, y_client)
        
        if verbose:
            print(f"    ✅ 訓練準確率: {train_accuracy:.3f}")
            print(f"    📈 訓練 F1 分數: {train_f1:.3f}")
            print(f"    ⏱️  訓練時間: {training_time:.3f} 秒")
        
        return {
            'client_id': client_id,
            'model': model,
            'domains': domains,
            'train_accuracy': train_accuracy,
            'train_f1': train_f1,
            'training_time': training_time,
            'config': config
        }
    
    def train_federated_learning(self, X_processed: pd.DataFrame, epochs: int = 100, 
                               verbose: bool = True) -> Dict:
        """執行聯邦學習訓練"""
        
        print(f"\n🚀 開始 {self.partition_info['type']} 聯邦學習訓練...")
        print(f"訓練參數: epochs={epochs}")
        
        start_time = time.time()
        results = {}
        
        # 訓練每個客戶端
        for client_id, client_data in self.partition_info['clients'].items():
            if verbose:
                print(f"\n📍 訓練 {client_id}...")
                print(f"   資料規模: {client_data['n_samples']} 樣本 × {client_data['n_features']} 特徵")
                if client_data.get('feature_overlap'):
                    print(f"   🔗 重疊特徵: {len(client_data['feature_overlap'])} 個 {client_data['feature_overlap']}")
                
            # 訓練客戶端模型
            client_result = self.train_client(
                client_id, client_data, X_processed, epochs, verbose=verbose
            )
            
            # 儲存結果
            self.client_models[client_id] = client_result['model']
            self.client_domains[client_id] = client_result['domains']
            
            results[client_id] = {
                'train_accuracy': client_result['train_accuracy'],
                'train_f1': client_result['train_f1'],
                'training_time': client_result['training_time'],
                'n_samples': client_data['n_samples'],
                'n_features': client_data['n_features'],
                'feature_overlap': client_data.get('feature_overlap', []),
                'config': client_result['config'],
                'domains_count': len(client_result['domains'])
            }
            
            if verbose:
                print(f"   🎯 {client_id} 訓練完成")
        
        # 計算整體統計
        total_samples = sum(r['n_samples'] for r in results.values())
        weighted_accuracy = sum(
            r['train_accuracy'] * r['n_samples'] for r in results.values()
        ) / total_samples
        
        weighted_f1 = sum(
            r['train_f1'] * r['n_samples'] for r in results.values()
        ) / total_samples
        
        total_time = time.time() - start_time
        
        if verbose:
            print(f"\n📊 {self.partition_info['type']} 聯邦學習完成！")
            print(f"   ⏱️  總訓練時間: {total_time:.2f} 秒")
            print(f"   🎯 加權平均訓練準確率: {weighted_accuracy:.3f}")
            print(f"   📈 加權平均 F1 分數: {weighted_f1:.3f}")
            print(f"   🏢 參與客戶端: {len(results)} 個")
            print(f"   📊 總樣本數: {total_samples}")
        
        return {
            'type': self.partition_info['type'],
            'client_results': results,
            'weighted_accuracy': weighted_accuracy,
            'weighted_f1': weighted_f1,
            'total_training_time': total_time,
            'total_samples': total_samples,
            'num_clients': len(results)
        }
    
    def evaluate_on_test(self, X_test, y_test, test_feature_names) -> Dict:
        """在測試集上評估聯邦模型"""
        
        print(f"\n📋 在測試集上評估聯邦 EiNet 模型...")
        
        client_evaluations = {}
        predictions_ensemble = []
        probabilities_ensemble = []
        
        for client_id, model in self.client_models.items():
            client_data = self.partition_info['clients'][client_id]
            
            # 找到客戶端特徵在測試集中的對應索引
            client_feature_indices = []
            for feature in client_data['features']:
                if feature in test_feature_names:
                    client_feature_indices.append(test_feature_names.index(feature))
            
            if len(client_feature_indices) == 0:
                print(f"   ⚠️  {client_id}: 沒有對應的測試特徵")
                continue
                
            # 提取客戶端對應的測試特徵
            X_test_client = X_test[:, client_feature_indices]
            
            # 按照 simple-einet 風格 reshape
            X_test_client_reshaped = torch.tensor(X_test_client).unsqueeze(1)
            
            # 預測
            try:
                acc = accuracy(model, X_test_client_reshaped, y_test)
                fscore = f1(model, X_test_client_reshaped, torch.from_numpy(y_test))
                
                probs = torch.exp(model(X_test_client_reshaped))
                predictions = probs.argmax(dim=-1)  
                
                client_evaluations[client_id] = {
                    'accuracy': acc,
                    'f1_score': fscore,
                    'n_test_features': len(client_feature_indices),
                    'predictions': predictions,
                }
                
                predictions_ensemble.append(predictions.detach().numpy())
                probabilities_ensemble.append(probs.detach().numpy())
                
                print(f"   {client_id}: 準確率 {acc:.3f}, F1 {fscore:.3f} ({len(client_feature_indices)} 特徵)")
                
            except Exception as e:
                print(f"   ❌ {client_id}: 評估失敗 - {str(e)}")
        
        # 集成預測（簡單投票和平均機率）
        if predictions_ensemble and probabilities_ensemble:
            # 投票集成
            predictions_array = np.array(predictions_ensemble)
            ensemble_predictions_vote = np.apply_along_axis(
                lambda x: np.bincount(x).argmax(), axis=0, arr=predictions_array
            )
            
            # 機率平均集成
            ensemble_probabilities = np.mean(probabilities_ensemble, axis=0)
            ensemble_predictions_prob = np.argmax(ensemble_probabilities, axis=1)
            
            vote_accuracy = accuracy_score(y_test, ensemble_predictions_vote)
            vote_f1 = f1_score(y_test, ensemble_predictions_vote, average='weighted')
            # 
            prob_accuracy = accuracy_score(y_test, ensemble_predictions_prob)
            prob_f1 = f1_score(y_test, ensemble_predictions_prob, average='weighted')
            
            # 選擇更好的集成方法
            if prob_accuracy >= vote_accuracy:
                ensemble_accuracy = prob_accuracy
                ensemble_f1 = prob_f1
                ensemble_predictions = ensemble_predictions_prob
                ensemble_method = "機率平均"
            else:
                ensemble_accuracy = vote_accuracy
                ensemble_f1 = vote_f1
                ensemble_predictions = ensemble_predictions_vote
                ensemble_method = "投票"
                
        else:
            ensemble_accuracy = 0.0
            ensemble_f1 = 0.0
            ensemble_predictions = None
            ensemble_method = "無"
        
        print(f"\n🎯 集成結果 ({ensemble_method}):")
        print(f"   準確率: {ensemble_accuracy:.3f}")
        print(f"   F1 分數: {ensemble_f1:.3f}")
        
        return {
            'client_evaluations': client_evaluations,
            'ensemble_accuracy': ensemble_accuracy,
            'ensemble_f1': ensemble_f1,
            'ensemble_predictions': ensemble_predictions,
            'ensemble_method': ensemble_method
        }


In [209]:
partitioner = FederatedDataPartitionerRobust(
    X=X_train_tensor, 
    y=y_train_tensor,
    feature_names=X_processed.columns.tolist(),
    numeric_features=numeric_features,
    categorical_features=categorical_features
)

In [213]:
# 實驗 1: 水平聯邦學習 - Simple-EiNet 風格
print("\n" + "="*60)
print("🔵 實驗 1: 水平聯邦學習 (使用 Simple-EiNet API)")
print("相同特徵，不同樣本")
print("="*60)

horizontal_partition = partitioner.horizontal_partition(num_clients=3, random_state=42)
horizontal_trainer = FederatedEiNetTrainer(horizontal_partition)
horizontal_results = horizontal_trainer.train_federated_learning(
    X_processed, epochs=50, verbose=True
)

# 在測試集上評估
horizontal_eval = horizontal_trainer.evaluate_on_test(
    X_test, y_test, X_processed.columns.tolist()
)


🔵 實驗 1: 水平聯邦學習 (使用 Simple-EiNet API)
相同特徵，不同樣本
🔄 執行水平分割，分成 3 個客戶端...
  客戶端 0: 10099 樣本, 14 特徵
  客戶端 1: 10099 樣本, 14 特徵
  客戶端 2: 10100 樣本, 14 特徵

🚀 開始 horizontal 聯邦學習訓練...
訓練參數: epochs=50

📍 訓練 client_0...
   資料規模: 10099 樣本 × 14 特徵
   🔗 重疊特徵: 14 個 ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week', 'workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 12.29it/s]


    📊 模型配置: depth=2, sums=12, leaves=12
    🔧 特徵域: 14 個 domain
Epoch:  5, Loss: 0.5638, Train Acc: 74.47%, F1: 45.40%
Epoch: 10, Loss: 0.5520, Train Acc: 74.75%, F1: 45.59%
Epoch: 15, Loss: 0.5409, Train Acc: 75.63%, F1: 46.02%
Epoch: 20, Loss: 0.5303, Train Acc: 75.91%, F1: 46.35%
Epoch: 25, Loss: 0.5198, Train Acc: 76.26%, F1: 46.83%
Epoch: 30, Loss: 0.5095, Train Acc: 76.42%, F1: 47.15%
Epoch: 35, Loss: 0.4995, Train Acc: 76.56%, F1: 47.46%
Epoch: 40, Loss: 0.4898, Train Acc: 76.70%, F1: 47.84%
Epoch: 45, Loss: 0.4807, Train Acc: 76.94%, F1: 48.91%
Epoch: 50, Loss: 0.4723, Train Acc: 77.34%, F1: 50.69%
    ✅ 訓練準確率: 77.344
    📈 訓練 F1 分數: 50.689
    ⏱️  訓練時間: 8.634 秒
   🎯 client_0 訓練完成

📍 訓練 client_1...
   資料規模: 10099 樣本 × 14 特徵
   🔗 重疊特徵: 14 個 ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week', 'workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 12.64it/s]


    📊 模型配置: depth=2, sums=12, leaves=12
    🔧 特徵域: 14 個 domain
Epoch:  5, Loss: 0.6626, Train Acc: 66.94%, F1: 53.84%
Epoch: 10, Loss: 0.6321, Train Acc: 68.78%, F1: 54.67%
Epoch: 15, Loss: 0.6051, Train Acc: 71.68%, F1: 56.54%
Epoch: 20, Loss: 0.5810, Train Acc: 72.51%, F1: 56.97%
Epoch: 25, Loss: 0.5598, Train Acc: 73.18%, F1: 57.51%
Epoch: 30, Loss: 0.5411, Train Acc: 73.99%, F1: 57.74%
Epoch: 35, Loss: 0.5246, Train Acc: 75.06%, F1: 56.62%
Epoch: 40, Loss: 0.5099, Train Acc: 75.97%, F1: 57.24%
Epoch: 45, Loss: 0.4968, Train Acc: 76.53%, F1: 57.91%
Epoch: 50, Loss: 0.4849, Train Acc: 77.02%, F1: 58.76%
    ✅ 訓練準確率: 77.018
    📈 訓練 F1 分數: 58.756
    ⏱️  訓練時間: 7.875 秒
   🎯 client_1 訓練完成

📍 訓練 client_2...
   資料規模: 10100 樣本 × 14 特徵
   🔗 重疊特徵: 14 個 ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week', 'workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 13.28it/s]


    📊 模型配置: depth=2, sums=12, leaves=12
    🔧 特徵域: 14 個 domain
Epoch:  5, Loss: 0.5673, Train Acc: 64.93%, F1: 59.41%
Epoch: 10, Loss: 0.5474, Train Acc: 65.49%, F1: 56.98%
Epoch: 15, Loss: 0.5314, Train Acc: 75.15%, F1: 58.84%
Epoch: 20, Loss: 0.5189, Train Acc: 75.67%, F1: 50.61%
Epoch: 25, Loss: 0.5093, Train Acc: 75.38%, F1: 47.99%
Epoch: 30, Loss: 0.5016, Train Acc: 75.33%, F1: 47.29%
Epoch: 35, Loss: 0.4949, Train Acc: 75.48%, F1: 47.75%
Epoch: 40, Loss: 0.4885, Train Acc: 75.74%, F1: 48.87%
Epoch: 45, Loss: 0.4822, Train Acc: 76.03%, F1: 50.12%
Epoch: 50, Loss: 0.4764, Train Acc: 77.02%, F1: 54.90%
    ✅ 訓練準確率: 77.020
    📈 訓練 F1 分數: 54.904
    ⏱️  訓練時間: 7.911 秒
   🎯 client_2 訓練完成

📊 horizontal 聯邦學習完成！
   ⏱️  總訓練時間: 25.77 秒
   🎯 加權平均訓練準確率: 77.127
   📈 加權平均 F1 分數: 54.783
   🏢 參與客戶端: 3 個
   📊 總樣本數: 30298

📋 在測試集上評估聯邦 EiNet 模型...
   client_0: 準確率 76.910, F1 51.522 (14 特徵)
   client_1: 準確率 75.603, F1 57.866 (14 特徵)
   client_2: 準確率 76.983, F1 53.435 (14 特徵)

🎯 集成結果 (投票):
   準確率: 0.7

In [212]:
# 實驗 2: 垂直聯邦學習 - Simple-EiNet 風格
print("\n" + "="*60)
print("🟢 實驗 2: 垂直聯邦學習 (使用 Simple-EiNet API)")
print("相同樣本，不同特徵")
print("="*60)

vertical_partition = partitioner.vertical_partition(num_clients=3, random_state=42)
vertical_trainer = FederatedEiNetTrainer(vertical_partition)
vertical_results = vertical_trainer.train_federated_learning(
    X_processed, epochs=50, verbose=True
)

# 在測試集上評估
vertical_eval = vertical_trainer.evaluate_on_test(
    X_test, y_test, X_processed.columns.tolist()
)


🟢 實驗 2: 垂直聯邦學習 (使用 Simple-EiNet API)
相同樣本，不同特徵
🔄 執行垂直分割，分成 3 個客戶端...
  客戶端 0: 30298 樣本, 4 特徵
  客戶端 1: 30298 樣本, 4 特徵
  客戶端 2: 30298 樣本, 6 特徵

🚀 開始 vertical 聯邦學習訓練...
訓練參數: epochs=50

📍 訓練 client_0...
   資料規模: 30298 樣本 × 4 特徵


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 34.67it/s]

    📊 模型配置: depth=1, sums=8, leaves=8
    🔧 特徵域: 4 個 domain





Epoch:  5, Loss: 0.6917, Train Acc: 55.01%, F1: 49.16%
Epoch: 10, Loss: 0.6772, Train Acc: 59.30%, F1: 52.30%
Epoch: 15, Loss: 0.6640, Train Acc: 59.97%, F1: 53.70%
Epoch: 20, Loss: 0.6522, Train Acc: 60.14%, F1: 54.51%
Epoch: 25, Loss: 0.6416, Train Acc: 64.24%, F1: 59.41%
Epoch: 30, Loss: 0.6320, Train Acc: 64.32%, F1: 59.50%
Epoch: 35, Loss: 0.6234, Train Acc: 64.69%, F1: 59.73%
Epoch: 40, Loss: 0.6156, Train Acc: 64.67%, F1: 59.72%
Epoch: 45, Loss: 0.6083, Train Acc: 64.73%, F1: 59.74%
Epoch: 50, Loss: 0.6014, Train Acc: 67.48%, F1: 64.59%
    ✅ 訓練準確率: 67.476
    📈 訓練 F1 分數: 64.593
    ⏱️  訓練時間: 3.350 秒
   🎯 client_0 訓練完成

📍 訓練 client_1...
   資料規模: 30298 樣本 × 4 特徵


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 30.33it/s]

    📊 模型配置: depth=1, sums=8, leaves=8
    🔧 特徵域: 4 個 domain





Epoch:  5, Loss: 0.6369, Train Acc: 62.34%, F1: 52.83%
Epoch: 10, Loss: 0.6219, Train Acc: 67.71%, F1: 57.57%
Epoch: 15, Loss: 0.6105, Train Acc: 67.95%, F1: 57.68%
Epoch: 20, Loss: 0.6018, Train Acc: 68.72%, F1: 58.14%
Epoch: 25, Loss: 0.5946, Train Acc: 69.28%, F1: 58.55%
Epoch: 30, Loss: 0.5884, Train Acc: 69.56%, F1: 59.97%
Epoch: 35, Loss: 0.5829, Train Acc: 69.61%, F1: 60.04%
Epoch: 40, Loss: 0.5780, Train Acc: 70.26%, F1: 60.22%
Epoch: 45, Loss: 0.5736, Train Acc: 71.45%, F1: 61.06%
Epoch: 50, Loss: 0.5695, Train Acc: 71.48%, F1: 61.10%
    ✅ 訓練準確率: 71.480
    📈 訓練 F1 分數: 61.100
    ⏱️  訓練時間: 3.154 秒
   🎯 client_1 訓練完成

📍 訓練 client_2...
   資料規模: 30298 樣本 × 6 特徵


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00,  8.92it/s]


    📊 模型配置: depth=2, sums=12, leaves=12
    🔧 特徵域: 6 個 domain
Epoch:  5, Loss: 0.8716, Train Acc: 23.88%, F1: 19.76%
Epoch: 10, Loss: 0.7564, Train Acc: 40.39%, F1: 40.39%
Epoch: 15, Loss: 0.6650, Train Acc: 68.05%, F1: 60.72%
Epoch: 20, Loss: 0.5985, Train Acc: 76.10%, F1: 59.36%
Epoch: 25, Loss: 0.5550, Train Acc: 77.05%, F1: 56.94%
Epoch: 30, Loss: 0.5302, Train Acc: 77.06%, F1: 56.22%
Epoch: 35, Loss: 0.5182, Train Acc: 76.61%, F1: 53.76%
Epoch: 40, Loss: 0.5135, Train Acc: 76.75%, F1: 52.88%
Epoch: 45, Loss: 0.5118, Train Acc: 76.78%, F1: 53.06%
Epoch: 50, Loss: 0.5108, Train Acc: 76.80%, F1: 53.25%
    ✅ 訓練準確率: 76.804
    📈 訓練 F1 分數: 53.249
    ⏱️  訓練時間: 9.415 秒
   🎯 client_2 訓練完成

📊 vertical 聯邦學習完成！
   ⏱️  總訓練時間: 16.83 秒
   🎯 加權平均訓練準確率: 71.920
   📈 加權平均 F1 分數: 59.647
   🏢 參與客戶端: 3 個
   📊 總樣本數: 90894

📋 在測試集上評估聯邦 EiNet 模型...
   client_0: 準確率 67.281, F1 64.491 (4 特徵)
   client_1: 準確率 71.268, F1 60.285 (4 特徵)
   client_2: 準確率 76.622, F1 52.684 (6 特徵)

🎯 集成結果 (投票):
   準確率: 0.784
   

In [214]:
# 實驗 3: 混合聯邦學習 - Simple-EiNet 風格
print("\n" + "="*60)
print("🟡 實驗 3: 混合聯邦學習 (使用 Simple-EiNet API)")
print("不同樣本 + 不同特徵，具有重疊")
print("="*60)

hybrid_partition = partitioner.hybrid_partition(
    num_clients=4, 
    sample_overlap_ratio=0.3,
    feature_overlap_ratio=0.2,
    random_state=42
)
hybrid_trainer = FederatedEiNetTrainer(hybrid_partition)
hybrid_results = hybrid_trainer.train_federated_learning(
    X_processed, epochs=50, verbose=True
)

# 在測試集上評估
hybrid_eval = hybrid_trainer.evaluate_on_test(
    X_test, y_test, X_processed.columns.tolist()
)


🟡 實驗 3: 混合聯邦學習 (使用 Simple-EiNet API)
不同樣本 + 不同特徵，具有重疊
🔄 執行健壯版混合分割，分成 4 個客戶端...
  樣本重疊比例: 30.0%
  特徵重疊比例: 20.0%
  基礎樣本: 15148 個，重疊樣本池: 9089 個
  基礎特徵: 8 個，重疊特徵池: 2 個
  客戶端 0: 5680 樣本 × 3 特徵
  客戶端 1: 5680 樣本 × 3 特徵
  客戶端 2: 5680 樣本 × 3 特徵
  客戶端 3: 5680 樣本 × 3 特徵

🚀 開始 hybrid_robust 聯邦學習訓練...
訓練參數: epochs=50

📍 訓練 client_0...
   資料規模: 5680 樣本 × 3 特徵
   🔗 重疊特徵: 1 個 ['age']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 49.64it/s]

    📊 模型配置: depth=1, sums=8, leaves=8
    🔧 特徵域: 3 個 domain
Epoch:  5, Loss: 0.7731, Train Acc: 55.97%, F1: 54.08%





Epoch: 10, Loss: 0.7505, Train Acc: 59.05%, F1: 55.79%
Epoch: 15, Loss: 0.7307, Train Acc: 59.91%, F1: 55.88%
Epoch: 20, Loss: 0.7134, Train Acc: 60.11%, F1: 56.02%
Epoch: 25, Loss: 0.6986, Train Acc: 60.09%, F1: 56.02%
Epoch: 30, Loss: 0.6859, Train Acc: 60.07%, F1: 55.86%
Epoch: 35, Loss: 0.6749, Train Acc: 60.76%, F1: 56.26%
Epoch: 40, Loss: 0.6654, Train Acc: 61.48%, F1: 56.65%
Epoch: 45, Loss: 0.6569, Train Acc: 62.02%, F1: 56.72%
Epoch: 50, Loss: 0.6492, Train Acc: 62.61%, F1: 57.12%
    ✅ 訓練準確率: 62.606
    📈 訓練 F1 分數: 57.117
    ⏱️  訓練時間: 1.063 秒
   🎯 client_0 訓練完成

📍 訓練 client_1...
   資料規模: 5680 樣本 × 3 特徵
   🔗 重疊特徵: 1 個 ['age']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 81.25it/s]

    📊 模型配置: depth=1, sums=8, leaves=8
    🔧 特徵域: 3 個 domain
Epoch:  5, Loss: 0.7064, Train Acc: 57.27%, F1: 49.79%





Epoch: 10, Loss: 0.6878, Train Acc: 62.36%, F1: 53.40%
Epoch: 15, Loss: 0.6711, Train Acc: 66.78%, F1: 57.53%
Epoch: 20, Loss: 0.6562, Train Acc: 65.85%, F1: 58.05%
Epoch: 25, Loss: 0.6429, Train Acc: 67.82%, F1: 61.41%
Epoch: 30, Loss: 0.6311, Train Acc: 67.89%, F1: 62.62%
Epoch: 35, Loss: 0.6205, Train Acc: 67.92%, F1: 62.70%
Epoch: 40, Loss: 0.6110, Train Acc: 68.12%, F1: 62.85%
Epoch: 45, Loss: 0.6023, Train Acc: 68.36%, F1: 62.89%
Epoch: 50, Loss: 0.5943, Train Acc: 68.66%, F1: 62.15%
    ✅ 訓練準確率: 68.662
    📈 訓練 F1 分數: 62.150
    ⏱️  訓練時間: 1.027 秒
   🎯 client_1 訓練完成

📍 訓練 client_2...
   資料規模: 5680 樣本 × 3 特徵
   🔗 重疊特徵: 1 個 ['age']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 50.65it/s]

    📊 模型配置: depth=1, sums=8, leaves=8
    🔧 特徵域: 3 個 domain





Epoch:  5, Loss: 0.7451, Train Acc: 58.59%, F1: 50.85%
Epoch: 10, Loss: 0.7055, Train Acc: 60.18%, F1: 51.34%
Epoch: 15, Loss: 0.6716, Train Acc: 60.81%, F1: 51.52%
Epoch: 20, Loss: 0.6433, Train Acc: 61.16%, F1: 51.77%
Epoch: 25, Loss: 0.6202, Train Acc: 62.15%, F1: 51.82%
Epoch: 30, Loss: 0.6017, Train Acc: 64.75%, F1: 55.23%
Epoch: 35, Loss: 0.5868, Train Acc: 66.90%, F1: 55.14%
Epoch: 40, Loss: 0.5749, Train Acc: 67.75%, F1: 58.56%
Epoch: 45, Loss: 0.5651, Train Acc: 68.54%, F1: 59.19%
Epoch: 50, Loss: 0.5570, Train Acc: 70.92%, F1: 60.07%
    ✅ 訓練準確率: 70.915
    📈 訓練 F1 分數: 60.072
    ⏱️  訓練時間: 1.032 秒
   🎯 client_2 訓練完成

📍 訓練 client_3...
   資料規模: 5680 樣本 × 3 特徵
   🔗 重疊特徵: 1 個 ['age']


Initializing PiecewiseLinear Leaf Layer: 100%|██████████| 3/3 [00:00<00:00, 43.41it/s]

    📊 模型配置: depth=1, sums=8, leaves=8
    🔧 特徵域: 3 個 domain





Epoch:  5, Loss: 0.6588, Train Acc: 67.31%, F1: 57.06%
Epoch: 10, Loss: 0.6324, Train Acc: 68.56%, F1: 58.35%
Epoch: 15, Loss: 0.6096, Train Acc: 69.68%, F1: 59.31%
Epoch: 20, Loss: 0.5900, Train Acc: 73.47%, F1: 62.20%
Epoch: 25, Loss: 0.5733, Train Acc: 74.89%, F1: 63.33%
Epoch: 30, Loss: 0.5590, Train Acc: 74.88%, F1: 63.75%
Epoch: 35, Loss: 0.5467, Train Acc: 76.23%, F1: 65.46%
Epoch: 40, Loss: 0.5361, Train Acc: 76.36%, F1: 65.69%
Epoch: 45, Loss: 0.5268, Train Acc: 76.97%, F1: 66.71%
Epoch: 50, Loss: 0.5187, Train Acc: 77.32%, F1: 67.40%
    ✅ 訓練準確率: 77.324
    📈 訓練 F1 分數: 67.399
    ⏱️  訓練時間: 1.074 秒
   🎯 client_3 訓練完成

📊 hybrid_robust 聯邦學習完成！
   ⏱️  總訓練時間: 4.55 秒
   🎯 加權平均訓練準確率: 69.877
   📈 加權平均 F1 分數: 61.685
   🏢 參與客戶端: 4 個
   📊 總樣本數: 22720

📋 在測試集上評估聯邦 EiNet 模型...
   client_0: 準確率 63.488, F1 58.239 (3 特徵)
   client_1: 準確率 69.097, F1 62.560 (3 特徵)
   client_2: 準確率 68.152, F1 59.245 (3 特徵)
   client_3: 準確率 76.722, F1 64.905 (3 特徵)

🎯 集成結果 (投票):
   準確率: 0.777
   F1 分數: 0.752


In [217]:
# 全面的實驗結果分析和對比
print("\n" + "="*80)
print("📊 Simple-EiNet 聲邦學習實驗結果總結與分析")
print("="*80)

# 收集所有結果
all_results = {
    '水平聯邦學習 (EiNet)': {
        'results': horizontal_results,
        'eval': horizontal_eval,
        'description': '相同特徵，不同樣本',
        'api_type': 'Simple-EiNet API'
    },
    '垂直聯邦學習 (EiNet)': {
        'results': vertical_results,
        'eval': vertical_eval,
        'description': '相同樣本，不同特徵',
        'api_type': 'Simple-EiNet API'
    },
    '混合聯邦學習 (EiNet)': {
        'results': hybrid_results,
        'eval': hybrid_eval,
        'description': '不同樣本+特徵，具有重疊',
        'api_type': 'Simple-EiNet API'
    }
}

# 創建對比表格
print("\n🎯 Simple-EiNet API 實驗結果對比:")
print("-" * 110)
print(f"{'方法':<20} {'描述':<22} {'客戶端數':<8} {'訓練準確率':<12} {'測試準確率':<12} {'訓練時間(s)':<12} {'集成方法':<12}")
print("-" * 110)

for method_name, data in all_results.items():
    results = data['results']
    eval_data = data['eval']
    description = data['description']
    
    ensemble_method = eval_data.get('ensemble_method', 'N/A')
    
    print(f"{method_name:<20} {description:<22} {results['num_clients']:<8} "
          f"{results['weighted_accuracy']:<12.3f} {eval_data['ensemble_accuracy']:<12.3f} "
          f"{results['total_training_time']:<12.2f} {ensemble_method:<12}")

print("-" * 110)

# Simple-EiNet API 特色分析
print("\n🔧 Simple-EiNet API 特色功能展示:")

print("\n1️⃣ **Domain 自動設定**")
for method_name, data in all_results.items():
    results = data['results']
    client_results = results['client_results']
    print(f"   {method_name}:")
    for client_id, client_result in client_results.items():
        domains_count = client_result['domains_count']
        n_features = client_result['n_features']
        print(f"     {client_id}: {domains_count} 個 Domain (對應 {n_features} 個特徵)")

print("\n2️⃣ **EiNet 模型配置自動調整**")
for method_name, data in all_results.items():
    results = data['results']
    client_results = results['client_results']
    print(f"   {method_name}:")
    for client_id, client_result in client_results.items():
        config = client_result['config']
        print(f"     {client_id}: depth={config.depth}, sums={config.num_sums}, leaves={config.num_leaves}")

# 性能分析
print("\n📈 性能分析:")
print("\n   🏆 **最佳表現**:")
best_test_acc = max([data['eval']['ensemble_accuracy'] for data in all_results.values()])
best_method = [name for name, data in all_results.items() 
               if data['eval']['ensemble_accuracy'] == best_test_acc][0]
print(f"      • 測試準確率最高: {best_method} ({best_test_acc:.3f})")

fastest_time = min([data['results']['total_training_time'] for data in all_results.values()])
fastest_method = [name for name, data in all_results.items() 
                  if data['results']['total_training_time'] == fastest_time][0]
print(f"      • 訓練速度最快: {fastest_method} ({fastest_time:.2f}s)")



📊 Simple-EiNet 聲邦學習實驗結果總結與分析

🎯 Simple-EiNet API 實驗結果對比:
--------------------------------------------------------------------------------------------------------------
方法                   描述                     客戶端數     訓練準確率        測試準確率        訓練時間(s)      集成方法        
--------------------------------------------------------------------------------------------------------------
水平聯邦學習 (EiNet)       相同特徵，不同樣本              3        77.127       0.767        25.77        投票          
垂直聯邦學習 (EiNet)       相同樣本，不同特徵              3        71.920       0.784        16.83        投票          
混合聯邦學習 (EiNet)       不同樣本+特徵，具有重疊           4        69.877       0.777        4.55         投票          
--------------------------------------------------------------------------------------------------------------

🔧 Simple-EiNet API 特色功能展示:

1️⃣ **Domain 自動設定**
   水平聯邦學習 (EiNet):
     client_0: 14 個 Domain (對應 14 個特徵)
     client_1: 14 個 Domain (對應 14 個特徵)
     client_2: 14 個 Domain (對應 14 個特徵)
   垂

In [230]:
horizontal_results

{'type': 'horizontal',
 'client_results': {'client_0': {'train_accuracy': tensor(77.3443),
   'train_f1': 50.68946154750993,
   'training_time': 8.634259939193726,
   'n_samples': 10099,
   'n_features': 14,
   'feature_overlap': ['age',
    'fnlwgt',
    'education-num',
    'capital-gain',
    'capital-loss',
    'hours-per-week',
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country'],
   'config': EinetConfig(num_features=14, num_channels=1, num_sums=12, num_leaves=12, num_repetitions=3, num_classes=2, depth=2, dropout=0.0, leaf_type=<class 'simple_einet.layers.distributions.piecewise_linear.PiecewiseLinear'>, leaf_kwargs={'alpha': 0.1}, layer_type='linsum', structure='top-down'),
   'domains_count': 14},
  'client_1': {'train_accuracy': tensor(77.0175),
   'train_f1': 58.75558478665693,
   'training_time': 7.875133991241455,
   'n_samples': 10099,
   'n_features': 14,
   'feature_overlap': ['age',
 

In [231]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

# Create data from the provided JSON
methods = ['Horizontal FL', 'Vertical FL', 'Hybrid FL']
train_acc = [horizontal_results['weighted_accuracy'], vertical_results['weighted_accuracy'], hybrid_results['weighted_accuracy']]
f1_acc = [horizontal_results['weighted_f1'], vertical_results['weighted_f1'], hybrid_results['weighted_f1']]
train_time = [horizontal_results['total_training_time'], vertical_results['total_training_time'], hybrid_results['total_training_time']]
clients = [horizontal_results['num_clients'], vertical_results['num_clients'], hybrid_results['num_clients']]
samples = [horizontal_results['total_samples'], vertical_results['total_samples'], hybrid_results['total_samples']]  # in thousands

colors = ['#F3AA60', '#EF6262', '#468897']

# Create subplots with 2x2 layout
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Accuracy Comparison', 'Training Time (s)', 
                   'Client Count', 'Sample Size (k)', ''),
    specs=[[{"secondary_y": False}, {"secondary_y": False},],
           [{"secondary_y": False}, {"secondary_y": False},]]
)

# Panel 1: Accuracy comparison (grouped bars)
for i, method in enumerate(methods):
    fig.add_trace(
        go.Bar(name=f'Train {method}', x=[method], y=[train_acc[i]], 
               marker_color=colors[i], opacity=0.8, legendgroup=f'group{i}',
               showlegend=False),
        row=1, col=1
    )

# Panel 2: Training time
fig.add_trace(
    go.Bar(x=methods, y=train_time, marker_color=colors,
           name='Train Time', showlegend=False),
    row=1, col=2
)

# Panel 3: Client count
fig.add_trace(
    go.Bar(x=methods, y=clients, marker_color=colors,
           name='Clients', showlegend=False),
    row=2, col=1
)

# Panel 4: Sample size
fig.add_trace(
    go.Bar(x=methods, y=samples, marker_color=colors,
           name='Samples', showlegend=False),
    row=2, col=2
)

# Add method legend
for i, method in enumerate(methods):
    fig.add_trace(
        go.Scatter(x=[None], y=[None], mode='markers',
                  marker=dict(size=10, color=colors[i]),
                  name=method, showlegend=True)
    )

# Update layout
fig.update_layout(
    title='FL Methods Multi-Panel Comparison',
    legend=dict(orientation='h', yanchor='bottom', y=1.08, xanchor='center', x=0.5),
    barmode='group'
)

# Update axes labels
fig.update_yaxes(title_text="Accuracy & F1(%)", row=1, col=1)
fig.update_yaxes(title_text="Time (s)", row=1, col=2)
fig.update_yaxes(title_text="Count", row=2, col=1)
fig.update_yaxes(title_text="Samples (k)", row=2, col=2)

fig.update_traces(cliponaxis=False)

# Save the chart
fig.show()

In [238]:
fig = go.Figure()
for i, method in enumerate(methods):
    fig.add_trace(
            go.Bar(name=f'Train {method}', x=[method], y=[train_acc[i]], 
                   marker_color=colors[i], opacity=0.8, legendgroup=f'group{i}',
                   showlegend=False),
        )
fig.update_yaxes(range=[0, 100]) 
fig.update_layout(title='F1', width=500, )
fig.show()
