In [1]:
# 图表库
%matplotlib inline
import matplotlib.pyplot as plt
# 深度学习
import tensorflow as tf
# 矩阵
import numpy as np
# sklearn的各种辅助小工具
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

In [2]:
# 设置随机种子，这样每次得到的随机数结果都一样了
tf.set_random_seed(0)

In [3]:
# 读取数据集
X, y = load_iris(True)

In [4]:
# 把目标变量改变形状，相当于把一个一维向量转化为一个1xn维矩阵（当然还是向量）
y = y.reshape([len(y), 1])

In [5]:
# one hot编码器，例如数据的分类数是3，可以吧把 0 编码为[0 0 1]，1 编码为 [0 1 0]， 2 编码为[1 0 0]
enc = OneHotEncoder()

In [6]:
y = enc.fit_transform(y).toarray()

In [7]:
# 分割测试集与训练集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)

In [8]:
# 训练集有120条数据，测试集30条数据，输入有4个变量，输出有3个变量（多分类）
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(120, 4) (30, 4) (120, 3) (30, 3)


In [9]:
# 输入变量数
input_size = 4
# 输出变量数
target_size = 3

In [10]:
# input的占位
X = tf.placeholder(tf.float32, [None, input_size])
y = tf.placeholder(tf.float32, [None, target_size])

In [11]:
# 要训练的参数
W = tf.Variable(tf.random_normal([input_size, target_size]))
b = tf.Variable(tf.random_normal([target_size]))

$pred = softmax(XW + b)$

X和W都是矩阵，b是向量

$pred$就是一般公式的$\hat{y}$

In [12]:
# 输出结果
pred = tf.nn.softmax(tf.add(tf.matmul(X, W), b))

In [13]:
# 定义损失函数，这个是标准softmax损失
cost = tf.reduce_mean(
    -tf.reduce_sum(y * tf.log(pred),
    reduction_indices=1)
)

In [14]:
# 学习率
learning_rate = 0.01
# 迭代次数
n_epoch = 1200

In [15]:
# 梯度下降算子
opt = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [16]:
# 初始化所有变量
init = tf.initialize_all_variables()

In [17]:
# disable GPU，关闭GPU支持
config = tf.ConfigProto(
    device_count = {'GPU': 0}
)

In [18]:
# 保存历史损失
costs = []
with tf.Session(config=config) as sess:
    sess.run(init)
    # 开始迭代
    for epoch in range(n_epoch + 1):
        # 反向传播，梯度下降
        sess.run(opt, feed_dict={X: X_train, y: y_train})
        # 计算损失
        c = sess.run(cost, feed_dict={X: X_train, y: y_train})
        # 记录损失
        costs.append(c)
        if epoch % 50 == 0:
            print('Epoch: {}, cost: {}'.format(epoch, c))
    # 计算训练集与测试集结果
    pred_train = sess.run(pred, feed_dict={X: X_train, y: y_train})
    pred_test = sess.run(pred, feed_dict={X: X_test, y: y_test})

Epoch: 0, cost: 2.81131911277771
Epoch: 50, cost: 0.8226200342178345
Epoch: 100, cost: 0.7223557829856873
Epoch: 150, cost: 0.6575696468353271
Epoch: 200, cost: 0.6116611361503601
Epoch: 250, cost: 0.5769749879837036
Epoch: 300, cost: 0.5495023131370544
Epoch: 350, cost: 0.5269352197647095
Epoch: 400, cost: 0.5078521370887756
Epoch: 450, cost: 0.49133339524269104
Epoch: 500, cost: 0.47676119208335876
Epoch: 550, cost: 0.4637070298194885
Epoch: 600, cost: 0.4518659710884094
Epoch: 650, cost: 0.44101566076278687
Epoch: 700, cost: 0.4309900403022766
Epoch: 750, cost: 0.42166298627853394
Epoch: 800, cost: 0.41293659806251526
Epoch: 850, cost: 0.4047338366508484
Epoch: 900, cost: 0.396992564201355
Epoch: 950, cost: 0.38966235518455505
Epoch: 1000, cost: 0.3827018141746521
Epoch: 1050, cost: 0.37607595324516296
Epoch: 1100, cost: 0.369755357503891
Epoch: 1150, cost: 0.3637147545814514
Epoch: 1200, cost: 0.35793235898017883


In [19]:
# 训练集准确率
acc = accuracy_score(y_train.argmax(axis=1), pred_train.argmax(axis=1))
print('train accuracy: {}'.format(acc))

train accuracy: 0.925


In [20]:
# 测试集准确率
acc = accuracy_score(y_test.argmax(axis=1), pred_test.argmax(axis=1))
print('test accuracy: {}'.format(acc))

test accuracy: 0.8666666666666667
