In [None]:
# Import necessary libraries
import numpy as np
import copy
from sklearn.cross_decomposition import CCA
import tensorflow as tf
from tensorflow.keras import layers, models

# Custom utilities for dataset loading and model training
# Example:
# from utils import load_data, preprocess_data


In [None]:
def direct_averaging(model_A, model_B):
    merged_model = copy.deepcopy(model_A)
    for layer_idx, (A_layer, B_layer) in enumerate(zip(model_A.layers, model_B.layers)):
        A_weights = A_layer.get_weights()
        B_weights = B_layer.get_weights()
        merged_weights = (np.array(A_weights) + np.array(B_weights)) / 2
        merged_model.layers[layer_idx].set_weights(merged_weights)
    return merged_model


In [None]:
def ensemble_models(models, test_images):
    predictions = [model.predict(test_images) for model in models]
    avg_predictions = np.mean(predictions, axis=0)
    return avg_predictions


In [None]:
from scipy.optimize import linear_sum_assignment

def permute_models(model_A, model_B):
    merged_model = copy.deepcopy(model_A)

    for layer_idx, (A_layer, B_layer) in enumerate(zip(model_A.layers, model_B.layers)):
        A_weights = A_layer.get_weights()
        B_weights = B_layer.get_weights()

        # Create a cost matrix based on differences in weights (use L2 norm)
        cost_matrix = np.linalg.norm(A_weights[0] - B_weights[0], axis=1)

        # Solve the assignment problem using Hungarian Algorithm
        row_idx, col_idx = linear_sum_assignment(cost_matrix)

        # Align the neurons by reordering B weights based on the optimal assignment
        reordered_B_weights = B_weights[0][col_idx]
        B_weights[0] = reordered_B_weights

        # Average aligned weights
        merged_weights = (A_weights + B_weights) / 2
        merged_model.layers[layer_idx].set_weights(merged_weights)

    return merged_model


In [None]:
def ot_fusion_models(model_A, model_B):
    # Placeholder for OT Fusion implementation, using permute as a simplified proxy
    return permute_models(model_A, model_B)


In [None]:
def matching_weights(model_A, model_B):
    merged_model = copy.deepcopy(model_A)

    for layer_idx, (A_layer, B_layer) in enumerate(zip(model_A.layers, model_B.layers)):
        A_weights = A_layer.get_weights()
        B_weights = B_layer.get_weights()

        # Compute the weighted average of both models
        merged_weights = (A_weights + B_weights) / 2

        merged_model.layers[layer_idx].set_weights(merged_weights)

    return merged_model


In [None]:
def zipit_merge(model_A, model_B):
    # Placeholder for ZipIt! implementation, simplified as direct averaging
    return direct_averaging(model_A, model_B)


In [None]:
# Updated CCA Merge Method
def compute_cca_transformations(A_layer_outputs, B_layer_outputs):
    # Initialize CCA to align neurons
    cca = CCA(n_components=min(A_layer_outputs.shape[1], B_layer_outputs.shape[1]))
    A_projected, B_projected = cca.fit_transform(A_layer_outputs, B_layer_outputs)

    # Extract transformation matrices
    PA = cca.x_rotations_
    PB = cca.y_rotations_

    return PA, PB

# Function to transform B model's weights using CCA projections
def transform_weights(A_weights, B_weights, PA, PB):
    # Transform B weights using the CCA projection matrices
    transformed_B_weights = PB @ B_weights @ PA.T
    return transformed_B_weights

# Function to merge models layer by layer using CCA
def cca_merge(model_A, model_B):
    merged_model = copy.deepcopy(model_A)

    for layer_idx, (A_layer, B_layer) in enumerate(zip(model_A.layers, model_B.layers)):
        A_layer_output = A_layer.output
        B_layer_output = B_layer.output

        # Compute CCA transformation matrices PA and PB
        PA, PB = compute_cca_transformations(A_layer_output, B_layer_output)

        # Transform and merge the weights
        merged_model.layers[layer_idx].set_weights(
            transform_weights(A_layer.get_weights(), B_layer.get_weights(), PA, PB)
        )

    return merged_model


In [None]:
def evaluate_all_methods(models, test_images, test_labels):
    # Base models average
    base_avg_acc = np.mean([model.evaluate(test_images, test_labels, verbose=0)[1] for model in models])
    print(f"Base models avg. accuracy: {base_avg_acc:.4f}")

    # Ensemble method
    ensemble_preds = ensemble_models(models, test_images)
    ensemble_acc = np.mean(np.argmax(ensemble_preds, axis=1) == test_labels.squeeze())
    print(f"Ensemble accuracy: {ensemble_acc:.4f}")

    # Direct Averaging
    merged_model = direct_averaging(models[0], models[1])
    direct_avg_acc = merged_model.evaluate(test_images, test_labels, verbose=0)[1]
    print(f"Direct averaging accuracy: {direct_avg_acc:.4f}")

    # Permute Method
    permuted_model = permute_models(models[0], models[1])
    permute_acc = permuted_model.evaluate(test_images, test_labels, verbose=0)[1]
    print(f"Permute accuracy: {permute_acc:.4f}")

    # OT Fusion
    ot_fusion_model = ot_fusion_models(models[0], models[1])
    ot_fusion_acc = ot_fusion_model.evaluate(test_images, test_labels, verbose=0)[1]
    print(f"OT Fusion accuracy: {ot_fusion_acc:.4f}")

    # Matching Weights
    matched_model = matching_weights(models[0], models[1])
    matching_acc = matched_model.evaluate(test_images, test_labels, verbose=0)[1]
    print(f"Matching Weights accuracy: {matching_acc:.4f}")

    # ZipIt Method
    zipit_model = zipit_merge(models[0], models[1])
    zipit_acc = zipit_model.evaluate(test_images, test_labels, verbose=0)[1]
    print(f"ZipIt! accuracy: {zipit_acc:.4f}")

    # CCA Merge (ours)
    cca_merged_model = cca_merge(models[0], models[1])
    cca_acc = cca_merged_model.evaluate(test_images, test_labels, verbose=0)[1]
    print(f"CCA Merge (ours) accuracy: {cca_acc:.4f}")


In [None]:
# Load models (ResNet, VGG, etc.)
model_A = tf.keras.applications.ResNet50(include_top=True, weights='imagenet')
model_B = tf.keras.applications.VGG16(include_top=True, weights='imagenet')

# Load other models (for example, DenseNet)
model_C = tf.keras.applications.DenseNet121(include_top=True, weights='imagenet')

# Assuming models list
models = [model_A, model_B, model_C]

# Prepare dataset (CIFAR-10 or ImageNet)
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
[1m102967424/102967424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels.h5
[1m33188688/33188688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [None]:
def evaluate_all_methods(models, test_images, test_labels):
    # Ensure models are compiled
    for model in models:
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Base models average
    base_avg_acc = np.mean([model.evaluate(test_images, test_labels, verbose=0)[1] for model in models])
    print(f"Base models avg. accuracy: {base_avg_acc:.4f}")

    # Other methods...
