# 一、查看Kaggle环境
## 1、查看操作系统信息

查看Linux内核版本

In [None]:
! uname -r

查看Linux发行版信息

In [None]:
! lsb_release -a

## 2、查看Python信息

In [None]:
! python --version

## 3、查看TensorFlow工具包信息

In [None]:
! pip show TensorFlow

# 二、导入必要的工具包

In [None]:
import os  # 文件/目录方模块
import cv2  # OpenCV模块
import random  # 随机模块
import numpy as np  # 数值计算模块
import pandas as pd  # 结构化数据分析模块
import seaborn as sns  # 统计绘图模块
import matplotlib.pyplot as plt  # 绘图模块
import zipfile  # zip文件处理模块
from sklearn.model_selection import train_test_split  # 样本分割模块
from sklearn.metrics import classification_report  # 分类评价模块
from tqdm import tqdm  # 进度条模块
from random import shuffle # 混洗模块
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop
%matplotlib inline 

# 三、参数设置
- AlexNet网络模型的输入图像高度和宽度为227像素.
- 样本文件位于/kaggle/input/dogs-vs-cats/train.zip压缩包之中
- 我们有两个类别需要识别：狗、猫
- 训练趟数设定为30趟
- 每批训练样本数量为64

In [None]:
IMG_SIZE = 227
SAMPlE_ZIP_FILE = '/kaggle/input/dogs-vs-cats/train.zip'
NUM_CLASSES = 2
EPOCHS = 100
BATCH_SIZE = 128
SAMPLE_PATH = '/kaggle/working/train/'

# 四、了解Dogs vs. Cats数据集
## 1、查看Kaggle数据集目录结构
　　Kaggle数据集保存在/kaggle/input目录中，本例的Dogs vs. Cats数据集保存在/kaggle/input/dogs-vs-cats目录中，其中train.zip为训练样本图像压缩文件，test1.zip为测试样本图像压缩文件，sampleSubmission.csv为提交文件的样例。

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## 2、查看train.zip内容

In [None]:
sample_zipfile = zipfile.ZipFile(SAMPlE_ZIP_FILE, 'r')    # 打开压缩包
sample_image_file_list = sample_zipfile.namelist()        # 或缺压缩包的文件名称列表，包括目录名称
sample_image_file_list.pop(0)                             # 删除目录名称，仅保留图像文件名称
print('样本图像文件总数：', len(sample_image_file_list))     # 输出样本总数量
data = sample_zipfile.read(sample_image_file_list[0])     # 将第一个图像样本文件内容读入内存缓冲区
image = cv2.imdecode(np.frombuffer(data, np.uint8), 1)    # 解码为OpenCV的BGR格式，1表示彩色图像
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)            # 转化为RGB格式
plt.imshow(image)                                         # 绘制输出图像样本

## 3、解压缩样本文件

In [None]:
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/train.zip', "r") as z:
    z.extractall(".")

In [None]:
! ls

## 3、训练集、校验集、测试集准备
### a. 生成样本的pandas数据表（DataFrame）

In [None]:
sample_image_file_list = os.listdir(SAMPLE_PATH)
categories = []
for filename in sample_image_file_list:
    category = filename.split(".")[0]
    if category == "dog":
        categories.append("dog")
    else:
        categories.append("cat")
df = pd.DataFrame({
    "filename" : sample_image_file_list,
    "category" : categories
})

### b. 输出前10个样本信息

In [None]:
df.head(10)

### c. 绘制个分类样本数量柱形图

In [None]:
df.category.value_counts().plot.bar()
cats = len(df[df['category'] == 'cat'])
dogs = len(df[df['category'] == 'dog'])
plt.title(str(cats)+" cats" + " | " +str(dogs)+" dogs")
plt.ylabel(len(df))
plt.show()

### d. 输出后10个样本信息

In [None]:
df.tail(10)

### e. 分割训练集、校验集

In [None]:
train_df, validate_df = train_test_split(df, test_size = 0.2, random_state = 42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

### f. 创建训练集生成器

In [None]:
# 数据增强
train_datagen = ImageDataGenerator(rotation_range=20,
                                   rescale=1./255,
                                   shear_range=0.15,
                                   zoom_range=0.15,
                                   horizontal_flip=True,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   fill_mode="nearest")

# 数据集生成器
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    SAMPLE_PATH, 
    x_col='filename',
    y_col='category',
    target_size=(IMG_SIZE,IMG_SIZE),  # (227, 227)
    class_mode='binary',
    batch_size=BATCH_SIZE,
    color_mode="rgb",
    shuffle=True
)

### g. 创建校验集生成器

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    SAMPLE_PATH, 
    x_col='filename',
    y_col='category',
    target_size=(IMG_SIZE,IMG_SIZE),  # (227, 227)
    class_mode='binary',
    batch_size=BATCH_SIZE,
    color_mode="rgb",
    shuffle=True
)

# 五、定义AlexNet网络模型

In [None]:
# 导入必须的包
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization


# 定义AlexNet类
class AlexNet:
    @staticmethod
    def build(height, width, channel):
        """
        根据输入样本的维度（width、height、channel），分类数量创建AlexNet网络模型
        Args:
            height:  输入样本的高度
            width:   输入样本的宽度
            channel: 输入样本的通道
        Returns:
            AlexNet网络模型对象

        """

        # 输入形状
        shape = (height, width, channel)
        inputs = Input(shape, name="input")
        # 第一层: CONV => RELU => LRN => POOL
        x = Conv2D(filters=96,
                   kernel_size=(11, 11),
                   strides=(4, 4),
                   input_shape=shape,
                   padding="valid",
                   name="conv_1")(inputs)
        # 局部响应标准化
        x = tf.nn.local_response_normalization(x, depth_radius=5, bias=2, alpha=0.0001, beta=0.75)
        x = Activation("relu", name="relu_1")(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name="max_pooling_1")(x)

        # 第二层: CONV => RELU => LRN => POOL
        x = Conv2D(filters=256,
                   kernel_size=(5, 5),
                   padding="same",
                   name="conv_2")(x)
        # 局部响应标准化
        x = tf.nn.local_response_normalization(x, depth_radius=5, bias=2, alpha=0.0001, beta=0.75)
        x = Activation("relu", name="relu_2")(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name="max_pooling_2")(x)

        # 第三层: CONV => RELU
        x = Conv2D(filters=384,
                   kernel_size=(3, 3),
                   padding="same",
                   name="conv_3")(x)
        x = Activation("relu", name="relu_3")(x)

        # 第四层: CONV => RELU
        x = Conv2D(filters=384,
                   kernel_size=(3, 3),
                   padding="same",
                   name="conv_4")(x)
        x = Activation("relu", name="relu_4")(x)

        # 第五层: CONV => RELU => POOL
        x = Conv2D(filters=256,
                   kernel_size=(3, 3),
                   padding="same",
                   name="conv_5")(x)
        x = Activation("relu", name="relu_5")(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name="max_pooling_3")(x)

        # 第六层: FLATTEN => DENSE => RELU => 随机失活
        x = Flatten()(x)
        x = Dense(4096, name="fully_connect_1")(x)
        x = Activation("relu", name="relu_6")(x)
        x = Dropout(0.5, name="drop_1")(x)

        # 第七层: DENSE => RELU => 随机失活
        x = Dense(4096, name="fully_connect_2")(x)
        x = Activation("relu", name="relu_7")(x)
        x = Dropout(0.5, name="drop_2")(x)

        # 第八层 DENSE => SOFTMAX
        x = Dense(1, name="fully_connect_3")(x)
        outputs = Activation("sigmoid", name="sigmoid")(x)

        return Model(inputs, outputs)

# 六、训练AlexNet模型

In [None]:
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau
import json

model = AlexNet.build(IMG_SIZE, IMG_SIZE, 3)
opt = SGD(learning_rate=0.01, momentum=0.9, decay=0.005)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics = ['accuracy'])

# 校验损失不下降时降低学习速率
lr_reducer = ReduceLROnPlateau(monitor='val_loss', 
                               factor=0.1, 
                               patience=3, 
                               min_lr=0.0001, 
                               cooldown=1,
                               verbose=1)
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_df)/BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=len(validate_df)/BATCH_SIZE,
    callbacks=[lr_reducer]
)

# 将训练得到的模型保存到文件
print("[信息] 保存模型...")
MODEL_FILE = 'AlexNet.h5'
model.save(MODEL_FILE, overwrite=True)

# 保存训练历史数据
json_path = "AlexNet_Dogs_vs_Cat_Training_History.json"
f = open(json_path, "w")
f.write(json.dumps(str(history.history), skipkeys=True))
f.close()


# 七、绘制AlexNet模型训练性能图

In [None]:
import matplotlib.pyplot as plt


figure, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
t = figure.suptitle('AlexNet Training Performance', fontsize=12)
figure.subplots_adjust(top=0.85, wspace=0.3)

epoch_list = list(range(1, EPOCHS + 1))
ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_xticks(np.arange(0, EPOCHS + 1, 5))
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch #')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc="best")

ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
ax2.set_xticks(np.arange(0, EPOCHS + 1, 5))
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch #')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")

GRAPH_FILE = 'AlexNet_Training_Performance.pdf'
figure.savefig(GRAPH_FILE)
figure.show()