In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
model_A= keras.models.load_model('fashion-mnist.h5')



In [3]:
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

Note that model_A and model_B_on_A now share some layers. When you train
model_B_on_A, it will also affect model_A. If you want to avoid that, you need to clone
model_A before you reuse its layers. To do this, you must clone model A’s architecture,
then copy its weights (since clone_model() does not clone the weights):

In [4]:
model_A_clone=keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())

Now we could just train model_B_on_A for task B, but since the new output layer was
initialized randomly, it will make large errors, at least during the first few epochs, so
there will be large error gradients that may wreck the reused weights. To avoid this,
one approach is to freeze the reused layers during the first few epochs, giving the new
layer some time to learn reasonable weights. 

In [5]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable=False
    

In [6]:
model_B_on_A.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

Next, we can train the model for a few epochs, then unfreeze the reused layers 

In [8]:
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

keep_classes = [5, 6]   

train_mask = np.isin(y_train, keep_classes)
test_mask  = np.isin(y_test, keep_classes)

x_train_filtered = x_train[train_mask]
y_train_filtered = y_train[train_mask]

x_test_filtered = x_test[test_mask]
y_test_filtered = y_test[test_mask]

print("Train shape:", x_train_filtered.shape)
print("Test shape:",  x_test_filtered.shape)

Train shape: (12000, 28, 28)
Test shape: (2000, 28, 28)


In [9]:
label_map = {5: 0, 6: 1}   # 0 = Sandal, 1 = Shirt

y_train_filtered = np.array([label_map[y] for y in y_train_filtered])
y_test_filtered  = np.array([label_map[y] for y in y_test_filtered])

In [10]:
from sklearn.model_selection import train_test_split

x_train_final, x_val, y_train_final, y_val = train_test_split(
    x_train_filtered, y_train_filtered, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_train_filtered  
)

print("Final Train:", x_train_final.shape)
print("Validation:", x_val.shape)
print("Test:", x_test_filtered.shape)


Final Train: (9600, 28, 28)
Validation: (2400, 28, 28)
Test: (2000, 28, 28)


In [11]:
history= model_B_on_A.fit(x_train_final , y_train_final ,epochs=10 , validation_data=(x_val, y_val))

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9940 - loss: 8.1504 - val_accuracy: 0.9979 - val_loss: 3.6477
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9979 - loss: 3.3674 - val_accuracy: 0.9971 - val_loss: 3.4409
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9980 - loss: 2.8155 - val_accuracy: 0.9983 - val_loss: 2.8540
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9981 - loss: 1.9693 - val_accuracy: 0.9975 - val_loss: 2.0067
Epoch 5/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9982 - loss: 1.2592 - val_accuracy: 0.9987 - val_loss: 1.4406
Epoch 6/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9982 - loss: 1.0831 - val_accuracy: 0.9971 - val_loss: 2.6399
Epoch 7/10
[1m300/300[0m 

In [12]:
for layer in model_B_on_A.layers[:-1]:
 layer.trainable = True


In [15]:
optimizer = keras.optimizers.SGD(learning_rate=1e-4)


In [16]:
model_B_on_A.compile(loss="binary_crossentropy", optimizer=optimizer,
 metrics=["accuracy"])


In [17]:
history= model_B_on_A.fit(x_train_final , y_train_final ,epochs=10 , validation_data=(x_val, y_val))


Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9984 - loss: 0.3219 - val_accuracy: 0.9983 - val_loss: 1.0056
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9995 - loss: 0.1806 - val_accuracy: 0.9992 - val_loss: 0.8113
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9999 - loss: 0.0044 - val_accuracy: 0.9987 - val_loss: 0.6974
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 1.6395e-12 - val_accuracy: 0.9987 - val_loss: 0.6974
Epoch 5/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 1.6395e-12 - val_accuracy: 0.9987 - val_loss: 0.6974
Epoch 6/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 1.6395e-12 - val_accuracy: 0.9987 - val_loss: 0.6974
Epoch 7/10
[1m3

In [18]:
model_B_on_A.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1003 - loss: 3991.5696


[3991.569580078125, 0.10029999911785126]