In [1]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets

from keras.applications.resnet50 import ResNet50 # 這是從 resnet_builder.py 中直接 import 撰寫好的 resnet 函數
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


目錄tree:"D:\3_learn\9_MachineLearning\2_ML100-Days_Data\Final\image_data"  
├── test/  
└── train/    
　　├── daisy/    
　　├── dandelion/    
　　├── rose/    
　　├── sunflower/    
　　└── tulip/

In [2]:
# 設定資料路徑
train_path = "D:/image_data/train" 
test_path = "D:/image_data/test" 

# 影像大小
IMAGE_SIZE = (256, 256)
# 影像類別數，共有 5 個類別
NUM_CLASSES = 5
# 若 GPU 記憶體不足，可調降 batch size 或凍結更多層網路
BATCH_SIZE = 8
# 凍結網路層數
FREEZE_LAYERS = 2
# Epoch 數
NUM_EPOCHS = 10
# 模型輸出儲存的檔案
WEIGHTS_FINAL = 'model-resnet50-final.h5'

# 圖形預處理
# image augmentation + 從directory feed資料
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2)

train_batches = train_datagen.flow_from_directory(
    train_path, target_size = IMAGE_SIZE, 
    classes = ['daisy','dandelion','rose','sunflower','tulip'], 
    batch_size = BATCH_SIZE,
    shuffle = True,
    subset = 'training')
print(train_batches.image_shape)

valid_batches = train_datagen.flow_from_directory(
    train_path, target_size = IMAGE_SIZE, 
    classes = ['daisy','dandelion','rose','sunflower','tulip'], 
    batch_size = BATCH_SIZE,
    shuffle = False,
    subset = 'validation')
print(valid_batches.image_shape)

for cls, idx in train_batches.class_indices.items():
    print('Class #{} = {}'.format(idx, cls))



Found 2260 images belonging to 5 classes.
(256, 256, 3)
Found 563 images belonging to 5 classes.
(256, 256, 3)
Class #0 = daisy
Class #1 = dandelion
Class #2 = rose
Class #3 = sunflower
Class #4 = tulip


In [3]:
# 以訓練好的 ResNet50 為基礎來建立模型
# 捨棄 ResNet50 頂層的 fully connected layers
net = ResNet50(include_top = False, weights = 'imagenet', input_tensor = None,
               input_shape = (IMAGE_SIZE[0],IMAGE_SIZE[1],3))
x = net.output
x = Flatten()(x)

# 增加 DropOut layer
x = Dropout(0.5)(x)

# 增加 Dense layer，以 softmax 產生個類別的機率值
output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x)

# 設定凍結與要進行訓練的網路層
model = Model(inputs = net.input, outputs=output_layer)
for layer in model.layers[:FREEZE_LAYERS]:
    layer.trainable = False
for layer in model.layers[FREEZE_LAYERS:]:
    layer.trainable = True
    
model.compile(loss = 'categorical_crossentropy',
              optimizer = Adam(lr=1e-5),
              metrics = ['accuracy'])

model.summary()

'''
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(256, 256, 3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(128))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])
'''


STEP_SIZE_TRAIN = train_batches.samples // train_batches.batch_size
STEP_SIZE_VALID = valid_batches.samples // valid_batches.batch_size

history = model.fit_generator(generator = train_batches, 
                                  steps_per_epoch = STEP_SIZE_TRAIN, 
                                  validation_data = valid_batches,
                                  validation_steps = STEP_SIZE_VALID, 
                                  epochs = NUM_EPOCHS, verbose = 1)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormal

In [5]:
test_path = "D:/image_data/test" 
test_batches = ImageDataGenerator(rescale=1./255).flow_from_directory(test_path, target_size=IMAGE_SIZE, batch_size=1, shuffle = False)

predictions = model.predict_generator(test_batches, steps=test_batches.samples, verbose=0)

Found 2000 images belonging to 1 classes.


In [0]:
import numpy as np

predicted_class_indices = np.argmax(predictions, axis=1)
test_filenames, test_labels = test_batches.filenames, predicted_class_indices

# 處理檔名: 以斜線('\\'或'/')分開(.split)，回傳最後值(.pop), 由右邊用'.'分開取第二個值(index=1, 最右邊index=0為副檔名)
def getFileNameWithoutExtension(path):
  return path.split('\\').pop().split('/').pop().rsplit('.', 1)[0]

test_filenames_list = []
for item in test_filenames:
  test_filenames_list.append(getFileNameWithoutExtension(item))

test_labels_list = []
for item in test_labels:
  test_labels_list.append(str(item))


In [0]:
import pandas as pd
dataset1 = pd.DataFrame(test_filenames_list)
dataset1.columns=['id']
dataset2 = pd.DataFrame(test_labels_list)
dataset2.columns=['flower_class']

submission = pd.concat([dataset1, dataset2], axis=1)

submission.to_csv('D:/image_data/submission_6.csv', index = False)