In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [3]:
#load data
from tensorflow.keras.datasets import fashion_mnist
(x_train_set, y_train_set), (x_test, y_test) = fashion_mnist.load_data()

#Split data
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(x_train_set, y_train_set, random_state=1)

#Preprocessing
x_train = x_train/ 255.0
x_valid = x_valid/ 255.0
x_test = x_test/ 255.0

In [5]:
def split_data(x, y):
    ind_B = (y==4) | (y==5)
    y_B = (y[ind_B]==5).astype(np.float32)
    y_A = y[~ind_B]
    y_A[y_A>5] -= 2
    return (x[~ind_B], y_A), (x[ind_B], y_B)
(x_train_A, y_train_A), (x_train_B, y_train_B) = split_data(x_train, y_train)
(x_valid_A, y_valid_A), (x_valid_B, y_valid_B) = split_data(x_valid, y_valid)
(x_test_A, y_test_A), (x_test_B, y_test_B) = split_data(x_test, y_test)

x_train_B, y_train_B = x_train_B[:100], y_train_B[:100]

In [7]:
x_train_A.shape, y_train_A[:10]

((35968, 28, 28), array([0, 7, 6, 2, 2, 1, 1, 1, 6, 7], dtype=uint8))

In [8]:
x_train_B.shape, y_train_B[:10]

((100, 28, 28), array([1., 0., 0., 0., 1., 1., 1., 0., 0., 0.], dtype=float32))

In [10]:
#先設置一個ModelA來對DataA進行訓練

keras.backend.clear_session()
np.random.seed(1)
tf.random.set_seed(1)

In [11]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28,28]))
for hidden_i in (200, 150, 100, 50):
    model_A.add(keras.layers.Dense(hidden_i, activation='relu'))
model_A.add(keras.layers.Dense(8,activation='softmax'))

In [12]:
model_A.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 150)               30150     
                                                                 
 dense_2 (Dense)             (None, 100)               15100     
                                                                 
 dense_3 (Dense)             (None, 50)                5050      
                                                                 
 dense_4 (Dense)             (None, 8)                 408       
                                                                 
Total params: 207,708
Trainable params: 207,708
Non-trai

In [13]:
model_A.compile(loss='sparse_categorical_crossentropy',
               optimizer=keras.optimizers.SGD(learning_rate=1e-3),
               metrics=['accuracy'])

In [14]:
train = model_A.fit(x_train_A, y_train_A, epochs=20,
                   validation_data=(x_valid_A, y_valid_A))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
model_A.evaluate(x_test_A, y_test_A)



[0.37736156582832336, 0.8666250109672546]

In [16]:
model_A.save('model_A.h5')

In [19]:
#Model_B

model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28,28]))
for hidden_i in (300, 150, 100, 80):
    model_B.add(keras.layers.Dense(hidden_i, activation='relu'))
model_B.add(keras.layers.Dense(1, activation='sigmoid'))

In [20]:
model_B.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 dense_10 (Dense)            (None, 300)               235500    
                                                                 
 dense_11 (Dense)            (None, 150)               45150     
                                                                 
 dense_12 (Dense)            (None, 100)               15100     
                                                                 
 dense_13 (Dense)            (None, 80)                8080      
                                                                 
 dense_14 (Dense)            (None, 1)                 81        
                                                                 
Total params: 303,911
Trainable params: 303,911
Non-tr

In [21]:
model_B.compile(loss='binary_crossentropy',
               optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                metrics=['accuracy'])

In [22]:
train = model_B.fit(x_train_B, y_train_B, epochs=20,
                   validation_data=(x_valid_B, y_valid_B))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:
model_B.evaluate(x_test_B, y_test_B)



[0.5798544883728027, 0.6434999704360962]

In [24]:
#Transfer Learning

#1. Load model_A
model_A = keras.models.load_model('model_A.h5')

In [25]:
#2. 使用model_A較低層

model_tr = keras.models.Sequential(model_A.layers[:-2])

In [26]:
model_tr.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 150)               30150     
                                                                 
 dense_2 (Dense)             (None, 100)               15100     
                                                                 
Total params: 202,250
Trainable params: 202,250
Non-trainable params: 0
_________________________________________________________________


In [27]:
#3. 替換Model_A較高層

model_tr.add(keras.layers.Dense(80, activation='relu'))
model_tr.add(keras.layers.Dense(1, activation='sigmoid'))

In [28]:
model_tr.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 150)               30150     
                                                                 
 dense_2 (Dense)             (None, 100)               15100     
                                                                 
 dense_15 (Dense)            (None, 80)                8080      
                                                                 
 dense_16 (Dense)            (None, 1)                 81        
                                                                 
Total params: 210,411
Trainable params: 210,411
Non-tr

In [29]:
#4.初期訓練(Epoch1~4)

for layer in model_tr.layers[:-2]:
    layer.trainable = False

In [30]:
model_tr.summary(show_trainable=True)

Model: "sequential_3"
____________________________________________________________________________
 Layer (type)                Output Shape              Param #   Trainable  
 flatten (Flatten)           (None, 784)               0         N          
                                                                            
 dense (Dense)               (None, 200)               157000    N          
                                                                            
 dense_1 (Dense)             (None, 150)               30150     N          
                                                                            
 dense_2 (Dense)             (None, 100)               15100     N          
                                                                            
 dense_15 (Dense)            (None, 80)                8080      Y          
                                                                            
 dense_16 (Dense)            (None, 1)                

In [36]:
model_tr.compile(loss='binary_crossentropy',
                optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                metrics=['accuracy'])

In [37]:
train = model_tr.fit(x_train_B, y_train_B, epochs=4,
                    validation_data=(x_valid_B, y_valid_B))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [38]:
#5.後期訓練(epochs 5~20)

for layer in model_tr.layers[:-2]:
    layer.trainable=True

In [39]:
model_tr.summary(show_trainable=True)

Model: "sequential_3"
____________________________________________________________________________
 Layer (type)                Output Shape              Param #   Trainable  
 flatten (Flatten)           (None, 784)               0         Y          
                                                                            
 dense (Dense)               (None, 200)               157000    Y          
                                                                            
 dense_1 (Dense)             (None, 150)               30150     Y          
                                                                            
 dense_2 (Dense)             (None, 100)               15100     Y          
                                                                            
 dense_15 (Dense)            (None, 80)                8080      Y          
                                                                            
 dense_16 (Dense)            (None, 1)                

In [40]:
model_tr.compile(loss='binary_crossentropy',
                optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                metrics=['accuracy'])

In [41]:
train = model_tr.fit(x_train_B, y_train_B, epochs=16,
                    validation_data=(x_valid_B, y_valid_B))

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [42]:
model_tr.evaluate(x_test_B, y_test_B)



[0.33649739623069763, 0.9185000061988831]

In [None]:
model_B.evaluate(x_testB)