### Model merging - Ratatouille

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, utils

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam


In [None]:
#early stopping to monitor the validation loss and avoid overfitting
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights=True)

#reducing learning rate on plateau
rlrop = ReduceLROnPlateau(monitor='val_loss', mode='min', patience= 5, factor= 0.5, min_lr= 1e-6, verbose=1)

In [None]:
# Carica il dataset Fashion MNIST
(x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data()

# Preprocessamento del dataset
x_train, x_test = x_train / 255.0, x_test / 255.0

# Aggiungi una dimensione dei canali
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

In [None]:
# Funzione per creare l'architettura del modello
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10)
    ])
    return model

In [None]:
# Numero di modelli da addestrare
num_models = 5

In [None]:
trained_models = []

for i in range(num_models):
    print(f"Training model {i+1}/{num_models}")
    model = create_model()
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    # Addestra il modello
    model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test),  callbacks = [early_stop, rlrop],verbose = 1)


    # Aggiungi il modello alla lista dei modelli addestrati
    trained_models.append(model)

Training model 1/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training model 2/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training model 3/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training model 4/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training model 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:

# Funzione per calcolare la media dei pesi
def average_weights(models):
    average_weights = []
    for layer in range(len(models[0].get_weights())):
        layer_weights = np.array([model.get_weights()[layer] for model in models])
        layer_average = np.mean(layer_weights, axis=0)
        average_weights.append(layer_average)
    return average_weights

# Calcola la media dei pesi
avg_weights = average_weights(trained_models)




In [None]:

# Applica i pesi mediati al modello base
final_model = create_model()
final_model.set_weights(avg_weights)

# Valuta il modello finale
final_model.compile(optimizer='adam',
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                    metrics=['accuracy'])



In [None]:
final_model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test),  callbacks = [early_stop, rlrop],verbose = 1)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x78fd558d7bb0>

In [None]:
test_loss, test_acc = final_model.evaluate(x_test, y_test, verbose=2)
print(f'\nTest accuracy: {test_acc}')

313/313 - 1s - loss: 0.2595 - accuracy: 0.9037 - 612ms/epoch - 2ms/step

Test accuracy: 0.9036999940872192
