In [4]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

tf.random.set_seed(42)
np.random.seed(42)
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full[:30000]
y_train_full = y_train_full[:30000]

X_test = X_train_full[:5000]
y_test = y_train_full[:5000]

X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

X_valid, X_train = X_train_full[:5000],X_train_full [5000:]
y_valid, y_train = y_train_full[:5000],y_train_full [5000:]


def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A), (X[y_5_or_6], y_B))

(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)


X_test_A = np.arange(3161872).reshape(4033, 28, 28)
X_test_A.shape
X_test_B = np.arange(758128).reshape(967, 28, 28)
y_test_B = np.arange(967).reshape(967,)
y_test_A = np.arange(4033).reshape(4033,)


model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

model_A.compile(loss= "sparse_categorical_crossentropy",
    optimizer= keras.optimizers.SGD(lr=1e-3),
    metrics=["accuracy"])

history = model_A(X_train_A, y_train_A)

model_A.save("my_model_A.h5")


model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="softmax"))

model_B.compile(loss= "binary_crossentropy",
    optimizer= keras.optimizers.SGD(lr=1e-3),
    metrics=["accuracy"])

history = model_B(X_train_B, y_train_B)

model_B.summary()
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False
model_B_on_A.summary()

model_B_on_A.compile(loss="binary_crossentropy",
         optimizer=keras.optimizers.SGD(lr=1e-3),
         metrics=["accuracy"])

history = model_B_on_A.fit(X_train_B, y_train_B, epochs=5,
                   validation_data=(X_valid_B, y_valid_B))



Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_7 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_45 (Dense)             (None, 300)               235500    
_________________________________________________________________
dense_46 (Dense)             (None, 100)               30100     
_________________________________________________________________
dense_47 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_48 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_49 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_50 (Dense)             (None, 1)               