# 调用环境

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras
# import keras 可以引用原生keras

## 打印版本

In [None]:
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

# 引入数据集

In [None]:
# keras里面有已经定义好的数据集
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

# 可视化一个样本

In [None]:
def show_single_image(img_arr):
    plt.imshow(img_arr, cmap='binary')
    plt.show()
    
show_single_image(x_train[0])

# 可视化一组样本，并且标注类别

In [None]:
def show_imgs(n_rows, n_cols, x_data, y_data, class_names):
    assert len(x_data) == len(y_data)
    assert n_rows * n_cols < len(x_data)
    plt.figure(figsize = (n_cols *1.4, n_rows * 1.6))
    for row in range(n_rows):
        for col in range(n_cols):
            index = n_cols * row + col
            plt.subplot(n_rows, n_cols, index+1)
            plt.imshow(x_data[index], cmap='binary',interpolation='nearest')
            plt.axis('off')
            plt.title(class_name[y_data[index]])
    plt.show()
    
class_names = ['T_shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankel boot']

show_imgs(3, 5, x_train, y_train, class_names)

In [None]:
# tk.keras.models.Sequential()

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28])) # 将28*28的矩阵展平为一维向量
model.add(keras.layers.Dense(300, activation='relu')) # 全连接层设置为300，激活函数为relu，这仅是一层全连接层
model.add(keras.layers.Dense(100, activation='relu')) # 这一层的100个全连接层连接上一层全连接层
model.add(keras.layers.Dense(10, activation='softmax')) #结果应该为10*1的概率分布

# relu: y = max(0, x)
# softmax: 将向量变成概率分布 x = [x1, x2, x3]
# y = [e^x1/sum, e^x2/sum, e^x3/sum]
# sum = e^x1 + e^x2 + e^x3

# reason for sparse:
# y->index 即y为数，则使用sparse_categorical_crossentropy 
# y->one_hot 即y为向量，则使用categorical_crossentropy
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])

# 不知道为啥，使用随机梯度下降的时候，首先模型的初始损失函数会特别大，然后模型也几乎没有起到训练的效果，但是在删除 optimizer = 'sgd'  
# 之后，模型就可以正常训练，也没有出现最开始损失函数过大的问题
# 我估计是随机梯度下降法和我选择的损失函数之间产生了问题

In [None]:
model.layers # 模型一共四层

In [None]:
model.summary() # 显示模型架构，并说明有多少参数，哪些参数是可以训练的

# 参数量的计算
# [None, 784] * W + b -> [None, 300] 即使全连接层乘上一个矩阵，改变它的维度  
# W.shape = [784, 300], b = [300]  

In [None]:
785 * 300

In [None]:
history = model.fit(x_train, y_train, epochs=10,
                    validation_data=(x_valid, y_valid)) # 遍历训练集10次，每次都验证

In [None]:
type(history)

In [None]:
history.history

In [None]:
def plot_learning_curves(history):
    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    plt.gca().set_ylim(0, 1)
    plt.show()

plot_learning_curves(history)