# 梯度下降法(gradient descent，GD)

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# 設定超參數(Hyperparameters)值
x_init = -10   # 起始權重
epochs = 10    # 執行週期數，跑10次
lr = 0.3       # 學習率

# 定義損失函數 y = x^2 - 10x +1
def Loss(x):
    y = x ** 2 - 10*x + 1
    return y

# 定義梯度
def dLoss(x_value):
    # 宣告Tensorflow變數(Variable)
    x = tf.Variable(x_value, dtype=tf.float32)
    with tf.GradientTape() as g:   # 自動微分
        y = Loss(x)
        dy_dx = g.gradient(y, x)       # 取得梯度(y對x微分，求出斜率=梯度)
    return dy_dx.numpy()           # 轉成Numpy array(陣列形式)

# 定義梯度下降法
def GD(x_init, df, epochs, lr):
    xs = np.zeros(epochs+1)
    x = x_init
    xs[0] = x
    for i in range(epochs):
        dx = df(x)
        # 梯度下降法公式=>更新x_new = x - learning_rate * gradient
        x += -dx * lr 
        xs[i+1] = x
    return xs

# 傳入dLoss
w = GD(x_init, dLoss, epochs, lr=lr)
print(np.around(w, 2))

t = np.arange(-10.0, 20.0, 0.001)
plt.plot(t, Loss(t), c='b')
plt.plot(w, Loss(w), c='r', marker = 'o', markersize = 5)

# 設定中文字型
plt.rcParams['font.sas-serif'] = ['Microsoft JhengHei'] # 正黑體
plt.rcParams['axes.unicode_minus'] = False              # 矯正負號
plt.title('梯度下降法', fontsize=18)
plt.xlabel('X 參數值', fontsize=18)
plt.ylabel('損失函數值', fontsize=18)
plt.show()

# 隨機梯度下降法（SGD，Stochastic Gradient Descent)

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# 初始值
x = tf.Variable(-8.00000)
y = tf.Variable(5.00000)

# 定義目標函數
def ObjFun():
    output = (0.5)*(x**2)+2.5*(y**2)
    return output

# 繪圖 # 要繪圖的函數，與目標函數相同
def Draw_fun(x,y):    
    z = (0.5)*(x**2)+2.5*(y**2)
    return z

# 選用一優化函式，作為迭代方法
opt = tf.keras.optimizers.SGD(learning_rate = 0.3)

epochs = 10        # 疊代次數
xValueArr = [-8]   # x的初始值
yValueArr = [5]    # y的初始值
for epoch in range(epochs):
    opt.minimize(ObjFun, var_list=[x,y])   # 帶入函數，xy之最小值為何
    xValueArr.append(x.numpy())
    yValueArr.append(y.numpy())

x = np.arange(-10.0, 10.0, 0.01)
y = np.arange(-10.0, 10.0, 0.01)
X, Y = np.meshgrid(x, y)
Z = Draw_fun(X,Y)
plt.figure(figsize = (10,5))
CS = plt.contour(X,Y,Z, colors = 'gray')
plt.title("Adamax Optimizer')
plt.xlabel('x')
plt.ylabel('y')
plt.plot(xValueArr, yValueArr, c='r')
for xt, yt in zip(xValueArr,yValueArr):
          plt.scatter(xt, yt, c='r')
plt.show()

# RMSprop優化器

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# 初始值
x = tf.Variable(-8.00000)
y = tf.Variable(5.00000)

# 定義目標函數
def ObjFun():   
    output = (0.5)*(x**2)+2.5*(y**2)
    return output

# 繪圖的函數，與目標函數相同
def Draw_fun(x,y):    
    z = (0.5)*(x**2)+2.5*(y**2)
    return z

# rho：衰減因子，也就是梯度方均根的衰減率
opt = tf.keras.optimizers.RMSprop(learning_rate=0.3,rho=0.9)

epochs=50        # 疊代次數
xValueArr=[-8]   # x的初始值
yValueArr=[5]    # y的初始值
for epoch in range(epochs):
    opt.minimize(ObjFun, var_list=[x,y])
    xValueArr.append(x.numpy())
    yValueArr.append(y.numpy())

x = np.arange(-10.0, 10.0, 0.01)
y = np.arange(-10.0, 10.0, 0.01)
X, Y = np.meshgrid(x, y)
Z = Draw_fun(X,Y)
plt.figure(figsize = (10,5))
CS = plt.contour(X,Y,Z, colors = 'gray')
plt.title("RMSprop Optimizer')
plt.xlabel('x')
plt.ylabel('y')
plt.plot(xValueArr, yValueArr, c='r')
for xt, yt in zip(xValueArr,yValueArr):
          plt.scatter(xt, yt, c='r')
plt.show()

# Adam 優化器

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# 初始值
x = tf.Variable(-8.00000)
y = tf.Variable(5.00000)

# 定義目標函數
def ObjFun():
    output = (0.5)*(x**2)+2.5*(y**2)
    return output

# 要繪圖的函數，與目標函數相同
def Draw_fun(x,y):    
    z = (0.5)*(x**2)+2.5*(y**2)
    return z

# 選用一優化函式，作為迭代方法
# beta_1：第一動量的指數衰減率，beta_2：第二動量的指數衰減率
opt = tf.keras.optimizers.Adam(learning_rate=0.3, beta_1=0.9, beta_2=0.999)

epochs=50        # 疊代次數
xValueArr=[-8]   # x的初始值
yValueArr=[5]    # y的初始值
for epoch in range(epochs):
    opt.minimize(ObjFun, var_list=[x,y]) # 帶入函數，xy之最小值為何
    xValueArr.append(x.numpy())
    yValueArr.append(y.numpy())

x = np.arange(-10.0, 10.0, 0.01)
y = np.arange(-10.0, 10.0, 0.01)
X, Y = np.meshgrid(x, y)
Z = Draw_fun(X,Y)
plt.figure(figsize = (10,5))
CS = plt.contour(X,Y,Z, colors = 'gray')
plt.title("Adam Optimizer')
plt.xlabel('x')
plt.ylabel('y')
plt.plot(xValueArr, yValueArr, c='r')
for xt, yt in zip(xValueArr,yValueArr):
          plt.scatter(xt, yt, c='r')
plt.show()

# 神經網路訓練實例(MNIST 手寫數字辨識)

訓練步驟：下載MNIST資料：這邊利用TF.Keras dataset抓取MNIST手寫辨識資料集

In [None]:
from tensorflow.keras.datasets import mnist
# 分為四部分：訓練集、測試集
(train_Data, train_Label),(test_Data, test_Label) = mnist.load_data()
# 查看mnist資料集大小
print('train_data =',len(train_Data))  # 基本上有6萬筆資料
print('test_data =',len(test_Data))
# 查看mnist資料集維度
print('train_data_dim =', train_Data.shape)
print('test_data_dim =', test_Data.shape)

這邊可以使用matplotlib輸出images數字影像，代碼如下：

In [None]:
import matplotlib.pyplot as plt

def plot_image(data):
    fig = plt.gcf()
    fig.set_size_inches(4,4)
    plt.imshow(data, cmap = 'binary')
    plt.show()

plot_image(train_Data[0])   # 每一張圖會對應到一個lable

這裡也可以印出訓練集第一筆的資料標籤值來驗證。

In [None]:
print('train_Label[0] =', train_label[0])

設置超參數與資料訓練前處理：接下來要設定訓練網路所需要的超參數跟資料大小轉換，代碼如下：

In [None]:
import tensorflow as tf

# 二維圖轉換成一維
learning_rate = 0.01     # 學習率
training_epoch = 1000    # 訓練次數
batch_size = 2000        # 每次訓練大小

# mnist資料的前置處理
# 把訓練資料、測試資料拉成一維
# 將原本是28X28的影像大小攤平成784，未來要當作輸入特徵
train_Data_R, test_Data_R = train_Data.reshape([-1,784]).astype('float32')\
                           ,test_Data.reshape([-1,784]).astype('float32')
# 訓練技巧：把資料正規化(把所有資料除以最大值)，可加快訓練速度
train_Data_R, test_Data_R = train_Data_R/255., test_Data_R/255.  # 因每個點為8bit，0~255

# 將資料打散並分批
train_Data_M = tf.data.Dataset.from_tensor_slices((train_Data_R,train_Label))
train_Data_M = train_Data_M.shuffle(5000).batch(batch_size)

設計網路

這裡我們利用四層全連接層網路來當作是我們MNIST手寫文字辨識網路架構，輸入層有784筆資料，第一層隱藏層有256個節點，第二層隱藏層有128個節點，第三層隱藏層有64個節點，最後的輸出有十個節點，分別代表十個數字的機率大小。

In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import tensorflow as tf

(train_Data, train_Label),(test_Data, test_Label) = mnist.load_data()

# 最後的Dense(10) 且activation用softmax
# 代表最後output為10個class(0~9)的機率
model = Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax'),
])

選擇優化器、損失函數

In [None]:
# 隨機梯度下降優化器
optimizer = tf.keras.optimizers.SGD(learning_rate)

# 定義損失計算 交叉熵
def cross_entropy_loss(x, y):  # x 預測值、y 真實值
    # 選擇交叉商當損失函數
    scce = tf.keras.losses.SparseCategoricalCrossentropy()
    # 計算損失
    loss = scce(y, x) 
    # 計算平均損失
    return tf.reduce_mean(loss)

定義正確率函數，判斷測試後的正確程度

In [None]:
# 訓練圖的張數->維度0，
# 預測->維度1(有10個值=數字0~9的機率值)，
# 計算準確率
def accuracy(y_pred, y_true):
    # tf.argmax(y_pred, 1) 返回y_pred維度為1的最大索引跟正確值做比較 
    # 求維度1最大值(機率值最高)的Index
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    # 計算平均正確率
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

定義訓練與測試方法

In [None]:
# 進行訓練
for epoch in range(training_epoch):
    for step, (batch_data, batch_label) in enumerate(train_Data_M): # 抽出資料，並標示批數、label
        with tf.GradientTape() as tape:                       # 計算梯度
            pre_data = model(batch_data)                      # 將數據丟進model內得到預測數值
            loss = cross_entropy_loss(pre_data, batch_label)  # 將預測值丟入，計算損失
            acc = accuracy(pre_data, batch_label)             # 計算正確率
            trainable_variables = model.trainable_variables   # 將model內全部抽出
            gradients = tape.gradient(loss, trainable_variables)  # 計算梯度->對損失函數微分
        optimizer.apply_gradients(zip(gradients, trainable_variables))

    # 每訓練完一個epoch，就拿測試集來測試準確率
    Testprec = model(test_Data_R)
    Testloss = cross_entropy_loss(Testprec, test_Label)
    Testacc = accuracy(Testprec, test_Label)
    print("Testloss: %f, Testaccuracy: %f" % (Testloss, Testacc))