In [2]:
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import timeit
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image_dataset_from_directory
%matplotlib inline

In [4]:
dim_x=128
dim_y=128

In [3]:
filenames= os.listdir('./train') 
labels=[]
for name in filenames:     
  list_of_split_name= name.split('.')[0]             
  if list_of_split_name == 'dog':         
    labels.append('dog')     
  else:         
    labels.append('cat') 

FileNotFoundError: [WinError 3] 系統找不到指定的路徑。: './train'

In [None]:
df_train= pd.DataFrame({ 'filename' : filenames ,'label': labels})
df_test= pd.DataFrame({ 'filename' : filenames ,'label': labels})
df_train.head(10)

In [None]:
df_train['label'].map({'dog': 0, 'cat': 1})
df_train['label'].value_counts()

In [None]:
for i in range(10) :     
  sample = filenames[i+10]     
  image = tf.keras.preprocessing.image.load_img('./train/' + sample)    
  plt.imshow(image)     
  plt.title('dog' if labels[i+10]=='dog' else 'cat')     
  plt.show() 

In [None]:
train_df, valid_df = train_test_split(df_train , test_size= 0.3 , random_state= 42, stratify=df_train['label'], shuffle=True) 
train_df= train_df.reset_index(drop=True)  
valid_df= valid_df.reset_index(drop=True)

In [None]:
train_data= keras.preprocessing.image.ImageDataGenerator(rescale=1./255 ,                                                          
                                                         rotation_range=20,                                                          
                                                         horizontal_flip=True,                                                          
                                                         vertical_flip=True,
                                                        validation_split=0.2) 
train_generator=train_data.flow_from_dataframe( dataframe=train_df,                                                 
                                                directory='./train',                                                
                                               target_size=(dim_x, dim_y),                                                 
                                               x_col="filename",                                                 
                                               y_col="label",                                                 
                                               color_mode="rgb",                                                 
                                               class_mode="binary",                                                 
                                               batch_size=32,                                                 
                                               seed = 42,                                                 
                                               shuffle=True,
                                               subset='training'
                                               )

In [None]:
valid_data=keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                       validation_split=0.2)  
valid_generator=valid_data.flow_from_dataframe( dataframe=valid_df,
                                               directory='./train',
                                               target_size=(dim_x, dim_y),
                                               x_col="filename",
                                               y_col="label",
                                               color_mode="rgb",
                                               class_mode="binary",
                                               batch_size=32,
                                               seed = 42,
                                               shuffle=True,
                                               subset='validation')

In [None]:
filenames = os.listdir("./test1") 
test_df = pd.DataFrame({'filename' : filenames})     
test_df.head()

In [None]:
test_data=keras.preprocessing.image.ImageDataGenerator(rescale=1./255)  
test_generator=test_data.flow_from_dataframe( dataframe=test_df,
                                              directory='./test1',
                                              target_size=(dim_x, dim_y),
                                              x_col="filename",
                                              class_mode=None,
                                              batch_size=32,
                                              seed = 42)

Default MobilenetV2


In [5]:
base = tf.keras.applications.MobileNetV2(input_shape=(dim_x, dim_y, 3), include_top=False, weights='imagenet')
# base.trainable = False
net = tf.keras.layers.GlobalAveragePooling2D()(base.output)
# net = tf.keras.layers.Dense(1)(net)

model = tf.keras.Model(inputs=[base.input], outputs=[net])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 64, 64, 32)   864         ['input_1[0][0]']                
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 64, 64, 32)   128         ['Conv1[0][0]']                  
                                                                                                  
 Conv1_relu (ReLU)              (None, 64, 64, 32)   0           ['bn_Conv1[0][0]']           

In [6]:
model.save('mobilenetv2.h5')



In [None]:

model.compile(
    optimizer='adam',     
    loss=tf.keras.losses.BinaryCrossentropy(), 
    metrics=['accuracy']
)
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=valid_generator,
    verbose=True)

Model Rework

In [None]:
def get_mobilenetV2(shape):
    input_node = tf.keras.layers.Input(shape=shape)

    net = tf.keras.layers.Conv2D(32, 3, (2, 2), use_bias=False, padding='same')(input_node)
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.ReLU(max_value=6)(net)

    net = tf.keras.layers.DepthwiseConv2D(3, use_bias=False, padding='same')(net)
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.ReLU(max_value=6)(net)
    net = tf.keras.layers.Conv2D(16, 1, use_bias=False, padding='same')(net)
    net = tf.keras.layers.BatchNormalization()(net)

    net = bottleneck(net, 16, 24, (2, 2), shortcut=False, zero_pad=True)  # block_1
    net = bottleneck(net, 24, 24, (1, 1), shortcut=True)  # block_2

    net = bottleneck(net, 24, 32, (2, 2), shortcut=False, zero_pad=True)  # block_3
    net = bottleneck(net, 32, 32, (1, 1), shortcut=True)  # block_4
    net = bottleneck(net, 32, 32, (1, 1), shortcut=True)  # block_5

    net = bottleneck(net, 32, 64, (2, 2), shortcut=False, zero_pad=True)  # block_6
    net = bottleneck(net, 64, 64, (1, 1), shortcut=True)  # block_7
    net = bottleneck(net, 64, 64, (1, 1), shortcut=True)  # block_8
    net = bottleneck(net, 64, 64, (1, 1), shortcut=True)  # block_9

    net = bottleneck(net, 64, 96, (1, 1), shortcut=False)  # block_10
    net = bottleneck(net, 96, 96, (1, 1), shortcut=True)  # block_11
    net = bottleneck(net, 96, 96, (1, 1), shortcut=True)  # block_12

    net = bottleneck(net, 96, 160, (2, 2), shortcut=False, zero_pad=True)  # block_13
    net = bottleneck(net, 160, 160, (1, 1), shortcut=True)  # block_14
    net = bottleneck(net, 160, 160, (1, 1), shortcut=True)  # block_15

    net = bottleneck(net, 160, 320, (1, 1), shortcut=False)  # block_16

    net = tf.keras.layers.Conv2D(1280, 1, use_bias=False, padding='same')(net)
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.ReLU(max_value=6)(net)

    return input_node, net


def bottleneck(net, filters, out_ch, strides, shortcut=True, zero_pad=False):

    padding = 'valid' if zero_pad else 'same'
    shortcut_net = net

    net = tf.keras.layers.Conv2D(filters * 6, 1, use_bias=False, padding='same')(net)
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.ReLU(max_value=6)(net)
    if zero_pad:
        net = tf.keras.layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(net)

    net = tf.keras.layers.DepthwiseConv2D(3, strides=strides, use_bias=False, padding=padding)(net)
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.ReLU(max_value=6)(net)

    net = tf.keras.layers.Conv2D(out_ch, 1, use_bias=False, padding='same')(net)
    net = tf.keras.layers.BatchNormalization()(net)

    if shortcut:
        net = tf.keras.layers.Add()([net, shortcut_net])

    return net

In [None]:
input_node, net = get_mobilenetV2((dim_x,dim_y,3))
net = tf.keras.layers.GlobalAveragePooling2D()(net)
net = keras.layers.Dense(1,activation='sigmoid')(net)

rework_model = tf.keras.Model(inputs=[input_node], outputs=[net])
for origin_layer, rework_layer in zip(model.layers, rework_model.layers):      
  origin_layer.trainable = True      
  rework_layer.set_weights(origin_layer.get_weights())
rework_model.compile(
    optimizer='adam',
    loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy']
)

history = rework_model.fit(
         train_generator,     
    epochs=20,#EPOCHS,     
    validation_data=valid_generator,
    validation_steps = valid_generator.samples // 32,     
    verbose=1)


In [None]:
def show_train_history(train_history, train, validation):        
    plt.plot(train_history.history[train])        
    plt.plot(train_history.history[validation])        
    plt.title('Train History')        
    plt.ylabel(train)        
    plt.xlabel('Epoch')        
    plt.legend(['train', 'validation'], loc='upper left')        
    plt.show()        
    # 画出准确率执行结果  
show_train_history(history, 'accuracy', 'val_accuracy')  
    # 画出误差执行结果  
show_train_history(history, 'loss', 'val_loss')

In [None]:
predict = rework_model.predict(test_generator)
predict = [1 if x > 0.5 else 0 for x in predict]
test_df['category'] = predict  

In [None]:
submission_df = test_df.copy() 
submission_df['id'] = submission_df['filename'].str.split('.').str[0] 
submission_df['label'] = submission_df['category'] 
submission_df.drop(['filename', 'category'], axis=1,inplace=True) 
submission_df.to_csv('submission.csv', index=False)