# Step1 : Sorting Data

In [None]:
import tensorflow as tf
from tensorflow import keras
import os
import zipfile

### 1.1 create empty folders

In [None]:
# create a base_dir
base_dir = 'dogs-vs-cats'
os.mkdir(base_dir)
# create train_dir and test_dir
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir,'val')
os.mkdir(train_dir)
os.mkdir(val_dir)
# create cats and dogs'train directories 
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
os.mkdir(train_cats_dir)
os.mkdir(train_dogs_dir)
# create cats and dogs'test directories 
val_cats_dir = os.path.join(val_dir, 'cats')
val_dogs_dir = os.path.join(val_dir, 'dogs')
os.mkdir(val_cats_dir)
os.mkdir(val_dogs_dir)

### 1.2 divide original training dataset into cats and dogs in train directory

In [None]:
import random
import shutil
from shutil import copyfile
from shutil import move

# classify original dataset
source_train = 'train'
filenames_train = os.listdir(source_train)
random.sample(filenames_train,len(filenames_train))
print('shuffle done')

for filename in filenames_train:
    filesource = os.path.join(source_train, filename)
    if os.path.getsize(filesource)==0:
        continue
    
    if filename.split('.')[0] == 'cat':
        copyfile(filesource, os.path.join(train_cats_dir,filename))
    elif filename.split('.')[0] == 'dog':
        copyfile(filesource, os.path.join(train_dogs_dir,filename))

print('Dividing DONE !')

### 1.3 split dataset ( some images from train to val)

In [None]:
split_rate = 0.3

print('begin moving cats')

# move some cats images to val set
filenames_cats = os.listdir(train_cats_dir)
random.sample(filenames_cats, len(filenames_cats))
cats_len = len(filenames_cats)
i = 0
while len(os.listdir(val_cats_dir)) < split_rate * cats_len:
    move(os.path.join(train_cats_dir,filenames_cats[i]), val_cats_dir)
    i += 1
    
print('begin moving dogs')

# move some dogs images to val set
filenames_dogs = os.listdir(train_dogs_dir)
random.sample(filenames_dogs, len(filenames_dogs))
dogs_len = len(filenames_dogs)

i = 0
while len(os.listdir(val_dogs_dir)) < split_rate * dogs_len:
    move(os.path.join(train_dogs_dir,filenames_dogs[i]), val_dogs_dir)
    i += 1
    
print('Spliting DONE !')

In [None]:
train_dir = 'dogs-vs-cats\\train'
val_dir = 'dogs-vs-cats\\val'
train_cats_dir = 'dogs-vs-cats\\train\\cats'
train_dogs_dir = 'dogs-vs-cats\\train\\dogs'
val_cats_dir = 'dogs-vs-cats\\val\\cats'
val_dogs_dir = 'dogs-vs-cats\\val\\dogs'

In [None]:
print('total training cat images:',len(os.listdir(train_cats_dir)))
print('total training dog images:',len(os.listdir(train_dogs_dir)))
print('total validation cat images:',len(os.listdir(val_cats_dir)))
print('total validation dog images:',len(os.listdir(val_dogs_dir)))

# Step2 : Data preprocessing

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(rescale = 1.0 / 255,
                                  rotation_range = 40,
                                  width_shift_range = 0.2,
                                  height_shift_range = 0.2,
                                  shear_range = 0.2,
                                  zoom_range = 0.2,
                                  horizontal_flip = True,
                                  fill_mode = 'nearest')

In [None]:
val_datagen = ImageDataGenerator(rescale = 1.0 / 255)

In [None]:
train_generator = train_datagen.flow_from_directory(train_dir,
                                                   batch_size = 50,
                                                   class_mode = 'binary',
                                                   target_size = (150,150))

In [None]:
val_generator = val_datagen.flow_from_directory(val_dir,
                                                batch_size = 50,
                                                class_mode = 'binary',
                                                target_size = (150,150))

# Step3 : Modeling and Compile

In [None]:
from tensorflow.keras.optimizers import RMSprop

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64,(3,3),activation='relu',input_shape=(150,150,3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(1024,activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(512,activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer = RMSprop(lr=0.0001), 
          loss = 'binary_crossentropy', 
          metrics = ['accuracy'])
model.summary()

# Step4 : Training

In [None]:
history = model.fit(train_generator,
                   steps_per_epoch = 350,
                   epochs = 10,
                   validation_data = val_generator,
                   validation_steps = 150,
                   verbose = 1)

In [None]:
model.save_weights("model.h5")

# Step5 : Visualize the accuracy and loss

In [None]:
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(len(acc))

In [None]:
import matplotlib.pyplot as plt
plt.plot(epochs,acc,label='train_accuracy')
plt.plot(epochs,val_acc,label='validation_accuracy')
plt.title('accuracy')
plt.legend()

In [None]:
plt.plot(epochs,loss,label='train_loss')
plt.plot(epochs,val_loss,label='validation_loss')
plt.title('loss')
plt.legend()

# Step6 : Predict

In [None]:
test_filenames = os.listdir("test\\test1")
print("number of test images:",len(test_filenames))

In [None]:
test_datagen = ImageDataGenerator(rescale = 1. /255)
test_generator = test_datagen.flow_from_directory("test",
                                                 class_mode = None,
                                                 batch_size = 1,
                                                 target_size = (150,150),
                                                 shuffle=False)   #shuffle = False !!!

In [None]:
test_generator.reset()  # reset is necessary
predict = model.predict(test_generator, verbose=1)

# Step 7 : Output prediction results

In [None]:
import numpy as np

predicted_class_indices = predict
predicted_class_indices[predicted_class_indices >= 0.5] = 1
predicted_class_indices[predicted_class_indices < 0.5] = 0
predicted_class_indices = predicted_class_indices.ravel()
predicted_class_indices = list(predicted_class_indices)

labels = (train_generator.class_indices)
label = dict((v,k) for k,v in labels.items())

In [None]:
# 建立代码标签与真实标签的关系
predictions = [label[i] for i in predicted_class_indices]

In [None]:
#建立预测结果和文件名之间的关系
filenames = test_generator.filenames

In [None]:
#导出预测结果到 .csv 文件
import pandas as pd
test = pd.DataFrame({"filenames":filenames,"predicted_class_indices":predicted_class_indices, "predictions":predictions})
test.to_csv('test.csv',index = None,encoding = 'utf8')