In [1]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


目錄tree:"D:\3_learn\9_MachineLearning\2_ML100-Days_Data\Final\image_data"  
├── test/  
└── train/    
　　├── daisy/    
　　├── dandelion/    
　　├── rose/    
　　├── sunflower/    
　　└── tulip/

In [2]:
# 設定資料路徑
train_path = "D:/image_data/train" 
test_path = "D:/image_data/test" 

# 圖形預處理
# image augmentation + 從directory feed資料
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2)

train_batches = train_datagen.flow_from_directory(
    train_path, target_size = (32,32), 
    classes = ['daisy','dandelion','rose','sunflower','tulip'], 
    batch_size=10,
    subset = 'training')
print(train_batches.image_shape)

valid_batches = train_datagen.flow_from_directory(
    train_path, target_size = (32,32), 
    classes = ['daisy','dandelion','rose','sunflower','tulip'], 
    batch_size = 10,
    subset = 'validation')
print(valid_batches.image_shape)
print(train_batches)



Found 2260 images belonging to 5 classes.
(32, 32, 3)
Found 563 images belonging to 5 classes.
(32, 32, 3)
<keras_preprocessing.image.directory_iterator.DirectoryIterator object at 0x0000020A785DB940>


In [3]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(32, 32, 3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])


STEP_SIZE_TRAIN = train_batches.samples // train_batches.batch_size
STEP_SIZE_VALID = valid_batches.samples // valid_batches.batch_size

history = model.fit_generator(generator = train_batches, 
                                  steps_per_epoch = STEP_SIZE_TRAIN, 
                                  validation_data = valid_batches,
                                  validation_steps = STEP_SIZE_VALID, 
                                  epochs = 30, verbose = 1)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)  

In [4]:
test_path = "D:/image_data/test" 
test_batches = ImageDataGenerator(rescale=1./255).flow_from_directory(test_path, target_size=(32,32), batch_size=1, shuffle = False)

predictions = model.predict_generator(test_batches, steps=test_batches.samples, verbose=0)

Found 2000 images belonging to 1 classes.


In [0]:
import numpy as np

predicted_class_indices = np.argmax(predictions, axis=1)
test_filenames, test_labels = test_batches.filenames, predicted_class_indices

# 處理檔名: 以斜線('\\'或'/')分開(.split)，回傳最後值(.pop), 由右邊用'.'分開取第二個值(index=1, 最右邊index=0為副檔名)
def getFileNameWithoutExtension(path):
  return path.split('\\').pop().split('/').pop().rsplit('.', 1)[0]

test_filenames_list = []
for item in test_filenames:
  test_filenames_list.append(getFileNameWithoutExtension(item))

test_labels_list = []
for item in test_labels:
  test_labels_list.append(str(item))


In [0]:
import pandas as pd
dataset1 = pd.DataFrame(test_filenames_list)
dataset1.columns=['id']
dataset2 = pd.DataFrame(test_labels_list)
dataset2.columns=['flower_class']

submission = pd.concat([dataset1, dataset2], axis=1)

submission.to_csv('D:/image_data/submission_3.csv', index = False)