## 模型 1. 用RNN學習一個字符到一個字符映射

### STEP1. 匯入 Keras 及相關模組

In [1]:
import numpy
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 給定隨機種子
numpy.random.seed(7)

### STEP2. 準備資料

我們現在可以定義我們的數據集，字母表(alphabet)。為了便於閱讀，我們使用大寫字母來定義字母表。

我們需要將字母表的每個字母映射到數字以便使用人工網絡來進行訓練。我們可以通過為字符創建字母索引的字典來輕鬆完成此操作。
我們還可以創建一個反向查找，將預測轉換回字符以供以後使用。

In [2]:
# 定義序列數據集
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# 創建字符映射到整數（0 - 25)和反相的查詢字典物件
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [3]:
# 看一下
print("字母對應到數字編號: \n", char_to_int)
print("\n")

print("數字編號對應到字母: \n", int_to_char)

字母對應到數字編號: 
 {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}


數字編號對應到字母: 
 {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'}


### STEP3. 準備訓練用資料

現在我們需要創建我們的輸入(X)和輸出(y)來訓練我們的神經網絡。我們可以通過定義一個輸入序列長度，然後從輸入字母序列中讀取序列。
例如，我們使用輸入長度1.從原始輸入數據的開頭開始，我們可以讀取第一個字母“A”，下一個字母作為預測“B”。我們沿著一個字符移動並重複，直到達到“Z”的預測。

In [4]:
# 準備輸入數據集
seq_length = 1 # time step = 1 = one to one
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i : i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in]) # label encoding
    dataY.append(char_to_int[seq_out]) # label encoding
    print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [5]:
print(dataX) # like word2vec!  c's vector! 2-d list!
print(dataY) # label encoding! 1-d list!

[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]


### STEP4. 資料預處理
我們需要將NumPy數組重塑為RNN網絡所期望的格式，也就是: (samples, time_steps, features)。
同時我們將進行資料的歸一化(normalize)來讓資料的值落於0到1之間。並對標籤值進行one-hot的編碼。


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如:

> 給 J -> 預測 K

> 給 X -> 預測 Y


目標訓練張量結構: (samples, time_steps, features) -> (n , **1**, **1** )

請特別注意, 這裡的1個字符會變成1個時間步裡頭的1個element的"feature"向量。

In [6]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))

# 歸一化 這邊不做one-hot是因為順序有正確，有抓到特徵之間的關係，所以不必做，A與C之間差距2個字母，歸一化也剛好抓到。
X = X / float(len(alphabet)) # 使用0,1,2,...當作a,b,c,...的字元vector

# one-hot 編碼輸出變量
y = utils.to_categorical(dataY) # dataY: label encoding! utils.to_categorical(dataY): one-hot encoding!

print("X shape: ", X.shape) # (25筆samples, "1"個時間步長, 1個feature) (batch, timesteps, feature)
print("y shape: ", y.shape)

X shape:  (25, 1, 1)
y shape:  (25, 26)


In [7]:
X

array([[[0.        ]],

       [[0.03846154]],

       [[0.07692308]],

       [[0.11538462]],

       [[0.15384615]],

       [[0.19230769]],

       [[0.23076923]],

       [[0.26923077]],

       [[0.30769231]],

       [[0.34615385]],

       [[0.38461538]],

       [[0.42307692]],

       [[0.46153846]],

       [[0.5       ]],

       [[0.53846154]],

       [[0.57692308]],

       [[0.61538462]],

       [[0.65384615]],

       [[0.69230769]],

       [[0.73076923]],

       [[0.76923077]],

       [[0.80769231]],

       [[0.84615385]],

       [[0.88461538]],

       [[0.92307692]]])

In [8]:
y

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 

### STEP5. 建立模型

In [9]:
# 創建模型 (SimpleRNN (unit, input_shape(timesteps, feature))
model = Sequential()
model.add(SimpleRNN(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 32)                1088      
                                                                 
 dense (Dense)               (None, 26)                858       
                                                                 
Total params: 1946 (7.60 KB)
Trainable params: 1946 (7.60 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### STEP6. 定義訓練並進行訓練

In [10]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
25/25 - 2s - loss: 3.2629 - accuracy: 0.0400 - 2s/epoch - 84ms/step
Epoch 2/500
25/25 - 0s - loss: 3.2364 - accuracy: 0.0400 - 113ms/epoch - 5ms/step
Epoch 3/500
25/25 - 0s - loss: 3.2227 - accuracy: 0.0400 - 105ms/epoch - 4ms/step
Epoch 4/500
25/25 - 0s - loss: 3.2107 - accuracy: 0.0400 - 97ms/epoch - 4ms/step
Epoch 5/500
25/25 - 0s - loss: 3.1991 - accuracy: 0.0400 - 106ms/epoch - 4ms/step
Epoch 6/500
25/25 - 0s - loss: 3.1903 - accuracy: 0.0400 - 100ms/epoch - 4ms/step
Epoch 7/500
25/25 - 0s - loss: 3.1796 - accuracy: 0.0400 - 100ms/epoch - 4ms/step
Epoch 8/500
25/25 - 0s - loss: 3.1697 - accuracy: 0.0400 - 96ms/epoch - 4ms/step
Epoch 9/500
25/25 - 0s - loss: 3.1600 - accuracy: 0.0400 - 103ms/epoch - 4ms/step
Epoch 10/500
25/25 - 0s - loss: 3.1521 - accuracy: 0.0800 - 93ms/epoch - 4ms/step
Epoch 11/500
25/25 - 0s - loss: 3.1441 - accuracy: 0.0400 - 97ms/epoch - 4ms/step
Epoch 12/500
25/25 - 0s - loss: 3.1356 - accuracy: 0.0800 - 141ms/epoch - 6ms/step
Epoch 13/500
25/25 

<keras.src.callbacks.History at 0x7b033a1478b0>

### STEP7. 評估模型準確率

In [11]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 76.00%


### STEP8. 預測結果

In [12]:
dataX

[[0],
 [1],
 [2],
 [3],
 [4],
 [5],
 [6],
 [7],
 [8],
 [9],
 [10],
 [11],
 [12],
 [13],
 [14],
 [15],
 [16],
 [17],
 [18],
 [19],
 [20],
 [21],
 [22],
 [23],
 [24]]

In [13]:
# 展示模型預測能力
for pattern in dataX:
    # 把26個字母一個個拿進模型來預測會出現的字母
    x = numpy.reshape(pattern, (1, len(pattern), 1)) #(batch, timestep, feature)
    x = x / float(len(alphabet))

    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction) # 機率最大的idx
    result = int_to_char[index] # 看看預測出來的是那一個字母
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result) # 打印結果

['A'] -> B
['B'] -> B
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> V
['U'] -> W
['V'] -> X
['W'] -> Z
['X'] -> Z
['Y'] -> Z


我們可以看到，"序列資料的預測"這個問題對於網絡學習確實是困難的。
原因是，在以上的範例中的RNN單位沒有任何上下文的知識(時間歩長只有"1")。每個輸入輸出模式以隨機順序(shuffle)出現到人工網網絡上，而且Keras的RNN網絡內步狀態(state)會在每個訓練循環(epoch)後被重置(reset)。

接下來，讓我們嘗試提供更多的順序資訊來讓RNN學習。

## 模型 2. RNN 學習三個字符特徵窗口(Three-Char Feature Window)到一個字符映射


### STEP1. 準備訓練用資料

In [14]:
# 準備輸入數據集
seq_length = 3 #
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length] # 3個字符
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [15]:
dataX

[[0, 1, 2],
 [1, 2, 3],
 [2, 3, 4],
 [3, 4, 5],
 [4, 5, 6],
 [5, 6, 7],
 [6, 7, 8],
 [7, 8, 9],
 [8, 9, 10],
 [9, 10, 11],
 [10, 11, 12],
 [11, 12, 13],
 [12, 13, 14],
 [13, 14, 15],
 [14, 15, 16],
 [15, 16, 17],
 [16, 17, 18],
 [17, 18, 19],
 [18, 19, 20],
 [19, 20, 21],
 [20, 21, 22],
 [21, 22, 23],
 [22, 23, 24]]

### STEP2. 資料預處理


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如:

> 給 HIJ -> 預測 K

> 給 EFG -> 預測 H

目標訓練張量結構: (samples, time_steps, features) -> (n , **1**, **3** )

請特別注意, 這裡的三個字符會變成一個有3個element的"feature" vector。因此在準備訓練資料集的時候, 1筆訓練資料只有"1"個時間步, 裡頭存放著"3"個字符的資料"features"向量。

In [16]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), 1, seq_length))  # <-- note here

# 歸一化
X = X / float(len(alphabet))

# 使用one hot encode 對Y值進行編碼
y = utils.to_categorical(dataY)

print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (23, 1, 3)
y shape:  (23, 26)


In [17]:
X

array([[[0.        , 0.03846154, 0.07692308]],

       [[0.03846154, 0.07692308, 0.11538462]],

       [[0.07692308, 0.11538462, 0.15384615]],

       [[0.11538462, 0.15384615, 0.19230769]],

       [[0.15384615, 0.19230769, 0.23076923]],

       [[0.19230769, 0.23076923, 0.26923077]],

       [[0.23076923, 0.26923077, 0.30769231]],

       [[0.26923077, 0.30769231, 0.34615385]],

       [[0.30769231, 0.34615385, 0.38461538]],

       [[0.34615385, 0.38461538, 0.42307692]],

       [[0.38461538, 0.42307692, 0.46153846]],

       [[0.42307692, 0.46153846, 0.5       ]],

       [[0.46153846, 0.5       , 0.53846154]],

       [[0.5       , 0.53846154, 0.57692308]],

       [[0.53846154, 0.57692308, 0.61538462]],

       [[0.57692308, 0.61538462, 0.65384615]],

       [[0.61538462, 0.65384615, 0.69230769]],

       [[0.65384615, 0.69230769, 0.73076923]],

       [[0.69230769, 0.73076923, 0.76923077]],

       [[0.73076923, 0.76923077, 0.80769231]],

       [[0.76923077, 0.80769231, 0.84615

### STEP3. 建立模型

In [18]:
# 創建模型
model = Sequential()
model.add(SimpleRNN(32, input_shape=(X.shape[1], X.shape[2]))) # <-- 特別注意這裡
model.add(Dense(y.shape[1], activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_1 (SimpleRNN)    (None, 32)                1152      
                                                                 
 dense_1 (Dense)             (None, 26)                858       
                                                                 
Total params: 2010 (7.85 KB)
Trainable params: 2010 (7.85 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### STEP4. 定義訓練並進行訓練

In [19]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 2s - loss: 3.3222 - accuracy: 0.0000e+00 - 2s/epoch - 79ms/step
Epoch 2/500
23/23 - 0s - loss: 3.2759 - accuracy: 0.0435 - 103ms/epoch - 4ms/step
Epoch 3/500
23/23 - 0s - loss: 3.2526 - accuracy: 0.0000e+00 - 111ms/epoch - 5ms/step
Epoch 4/500
23/23 - 0s - loss: 3.2348 - accuracy: 0.0435 - 83ms/epoch - 4ms/step
Epoch 5/500
23/23 - 0s - loss: 3.2159 - accuracy: 0.0000e+00 - 79ms/epoch - 3ms/step
Epoch 6/500
23/23 - 0s - loss: 3.1970 - accuracy: 0.0000e+00 - 62ms/epoch - 3ms/step
Epoch 7/500
23/23 - 0s - loss: 3.1812 - accuracy: 0.0435 - 69ms/epoch - 3ms/step
Epoch 8/500
23/23 - 0s - loss: 3.1688 - accuracy: 0.0000e+00 - 70ms/epoch - 3ms/step
Epoch 9/500
23/23 - 0s - loss: 3.1562 - accuracy: 0.0000e+00 - 78ms/epoch - 3ms/step
Epoch 10/500
23/23 - 0s - loss: 3.1435 - accuracy: 0.0000e+00 - 72ms/epoch - 3ms/step
Epoch 11/500
23/23 - 0s - loss: 3.1334 - accuracy: 0.0000e+00 - 64ms/epoch - 3ms/step
Epoch 12/500
23/23 - 0s - loss: 3.1234 - accuracy: 0.0000e+00 - 71ms/epoch

<keras.src.callbacks.History at 0x7b02c0995600>

### STEP5. 評估模型準確率

In [20]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 82.61%


### STEP6. 預測結果

In [21]:
# 展示一些模型預測
for pattern in dataX:
    x = numpy.reshape(pattern, (1, 1, len(pattern))) #batch, timestep, feature
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> W
['T', 'U', 'V'] -> X
['U', 'V', 'W'] -> Z
['V', 'W', 'X'] -> Z
['W', 'X', 'Y'] -> Z


我們可以看到，"模型#2"相比於"模型#1"在預測的表現上只有小幅提升。這個簡單的問題，即使使用window方法，我們仍然無法讓RNN學習到預測正確的字母出現的順序。

以上也是一個誤用RNN網絡的糟糕的張量結構。事實上，字母序列是一個特徵的"時間步驟(timesteps)"，而不是單獨特徵的一個時間步驟。我們已經給了網絡更多的上下文，但是沒有更多的順序上下文(context)。

下一範例中，我們將以"時間步驟(timesteps)"的形式給出更多的上下文(context)。

## 模型 3. RNN 學習三個字符的時間步驟窗口(Three-Char Time Step Window)到一個字符的映射

### STEP1. 準備訓練用資料

In [22]:
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [23]:
dataX

[[0, 1, 2],
 [1, 2, 3],
 [2, 3, 4],
 [3, 4, 5],
 [4, 5, 6],
 [5, 6, 7],
 [6, 7, 8],
 [7, 8, 9],
 [8, 9, 10],
 [9, 10, 11],
 [10, 11, 12],
 [11, 12, 13],
 [12, 13, 14],
 [13, 14, 15],
 [14, 15, 16],
 [15, 16, 17],
 [16, 17, 18],
 [17, 18, 19],
 [18, 19, 20],
 [19, 20, 21],
 [20, 21, 22],
 [21, 22, 23],
 [22, 23, 24]]

### STEP2. 資料預處理


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如:

> 給 HIJ -> 預測 K

> 給 EFG -> 預測 H

目標訓練張量結構: (samples, time_steps, features) -> (n , **3**, **1** )

準備訓練資料集的時候要把資料的張量結構轉換成, 1筆訓練資料有"3"個時間步, 裡頭存放著"1"個字符的資料"features"向量。

In [24]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))  # <-- note here!

# 歸一化
X = X / float(len(alphabet))

# 使用one hot encode 對Y值進行編碼
y = utils.to_categorical(dataY)

print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (23, 3, 1)
y shape:  (23, 26)


In [25]:
X

array([[[0.        ],
        [0.03846154],
        [0.07692308]],

       [[0.03846154],
        [0.07692308],
        [0.11538462]],

       [[0.07692308],
        [0.11538462],
        [0.15384615]],

       [[0.11538462],
        [0.15384615],
        [0.19230769]],

       [[0.15384615],
        [0.19230769],
        [0.23076923]],

       [[0.19230769],
        [0.23076923],
        [0.26923077]],

       [[0.23076923],
        [0.26923077],
        [0.30769231]],

       [[0.26923077],
        [0.30769231],
        [0.34615385]],

       [[0.30769231],
        [0.34615385],
        [0.38461538]],

       [[0.34615385],
        [0.38461538],
        [0.42307692]],

       [[0.38461538],
        [0.42307692],
        [0.46153846]],

       [[0.42307692],
        [0.46153846],
        [0.5       ]],

       [[0.46153846],
        [0.5       ],
        [0.53846154]],

       [[0.5       ],
        [0.53846154],
        [0.57692308]],

       [[0.53846154],
        [0.57692308],
    

### STEP3. 建立模型

In [26]:
# 創建模型
model = Sequential()
model.add(SimpleRNN(32, input_shape=(X.shape[1], X.shape[2]))) # <-- note here (3, 1) timestep=3
model.add(Dense(y.shape[1], activation='softmax'))

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_2 (SimpleRNN)    (None, 32)                1088      
                                                                 
 dense_2 (Dense)             (None, 26)                858       
                                                                 
Total params: 1946 (7.60 KB)
Trainable params: 1946 (7.60 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### STEP4. 定義訓練並進行訓練

In [27]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 1s - loss: 3.3139 - accuracy: 0.0435 - 1s/epoch - 57ms/step
Epoch 2/500
23/23 - 0s - loss: 3.2514 - accuracy: 0.0435 - 113ms/epoch - 5ms/step
Epoch 3/500
23/23 - 0s - loss: 3.2177 - accuracy: 0.0435 - 113ms/epoch - 5ms/step
Epoch 4/500
23/23 - 0s - loss: 3.1937 - accuracy: 0.0435 - 118ms/epoch - 5ms/step
Epoch 5/500
23/23 - 0s - loss: 3.1719 - accuracy: 0.0435 - 113ms/epoch - 5ms/step
Epoch 6/500
23/23 - 0s - loss: 3.1607 - accuracy: 0.0435 - 114ms/epoch - 5ms/step
Epoch 7/500
23/23 - 0s - loss: 3.1376 - accuracy: 0.0000e+00 - 102ms/epoch - 4ms/step
Epoch 8/500
23/23 - 0s - loss: 3.1253 - accuracy: 0.0000e+00 - 100ms/epoch - 4ms/step
Epoch 9/500
23/23 - 0s - loss: 3.1112 - accuracy: 0.0435 - 112ms/epoch - 5ms/step
Epoch 10/500
23/23 - 0s - loss: 3.0947 - accuracy: 0.0000e+00 - 104ms/epoch - 5ms/step
Epoch 11/500
23/23 - 0s - loss: 3.0766 - accuracy: 0.0000e+00 - 107ms/epoch - 5ms/step
Epoch 12/500
23/23 - 0s - loss: 3.0573 - accuracy: 0.0435 - 128ms/epoch - 6ms/step

<keras.src.callbacks.History at 0x7b02c16f9630>

### STEP5. 評估模型準確率

In [28]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 100.00%


### STEP6. 預測結果

In [29]:
# 讓我們擷取3個字符轉成張量結構 shape:(1,3,1)來進行infer
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> W
['U', 'V', 'W'] -> X
['V', 'W', 'X'] -> Y
['W', 'X', 'Y'] -> Z


由"模型#3"的表現來看, 當我們以"時間步驟(timesteps)"的形式給出更多的上下文(context)來訓練RNN模型時, 這時候循環神經網絡在序列資料的學習的效果就可以發揮出它的效用。

"模型#3"在驗證的結果可達到100%的預測準確度(在這個很簡單的26個字母的順序預測的任務上)!

## 連續2字符預測後面第2字符

In [35]:
seq_length = 2
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length -1 , 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length + 1]
    dataX.append([char_to_int[char] for char in seq_in]) # label encoding
    dataY.append(char_to_int[seq_out]) # label encoding
    print(seq_in, '->', seq_out)

AB -> D
BC -> E
CD -> F
DE -> G
EF -> H
FG -> I
GH -> J
HI -> K
IJ -> L
JK -> M
KL -> N
LM -> O
MN -> P
NO -> Q
OP -> R
PQ -> S
QR -> T
RS -> U
ST -> V
TU -> W
UV -> X
VW -> Y
WX -> Z


In [36]:
dataX

[[0, 1],
 [1, 2],
 [2, 3],
 [3, 4],
 [4, 5],
 [5, 6],
 [6, 7],
 [7, 8],
 [8, 9],
 [9, 10],
 [10, 11],
 [11, 12],
 [12, 13],
 [13, 14],
 [14, 15],
 [15, 16],
 [16, 17],
 [17, 18],
 [18, 19],
 [19, 20],
 [20, 21],
 [21, 22],
 [22, 23]]

In [37]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))  # <-- note here

# 歸一化
X = X / float(len(alphabet))

# 使用one hot encode 對Y值進行編碼
y = utils.to_categorical(dataY)

print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (23, 2, 1)
y shape:  (23, 26)


In [38]:
X

array([[[0.        ],
        [0.03846154]],

       [[0.03846154],
        [0.07692308]],

       [[0.07692308],
        [0.11538462]],

       [[0.11538462],
        [0.15384615]],

       [[0.15384615],
        [0.19230769]],

       [[0.19230769],
        [0.23076923]],

       [[0.23076923],
        [0.26923077]],

       [[0.26923077],
        [0.30769231]],

       [[0.30769231],
        [0.34615385]],

       [[0.34615385],
        [0.38461538]],

       [[0.38461538],
        [0.42307692]],

       [[0.42307692],
        [0.46153846]],

       [[0.46153846],
        [0.5       ]],

       [[0.5       ],
        [0.53846154]],

       [[0.53846154],
        [0.57692308]],

       [[0.57692308],
        [0.61538462]],

       [[0.61538462],
        [0.65384615]],

       [[0.65384615],
        [0.69230769]],

       [[0.69230769],
        [0.73076923]],

       [[0.73076923],
        [0.76923077]],

       [[0.76923077],
        [0.80769231]],

       [[0.80769231],
        [0.8

In [39]:
# 創建模型
model = Sequential()
model.add(SimpleRNN(32, input_shape=(X.shape[1], X.shape[2]))) # <-- note here (2, 1)
model.add(Dense(y.shape[1], activation='softmax'))

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_4 (SimpleRNN)    (None, 32)                1088      
                                                                 
 dense_4 (Dense)             (None, 26)                858       
                                                                 
Total params: 1946 (7.60 KB)
Trainable params: 1946 (7.60 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [40]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 1s - loss: 3.3055 - accuracy: 0.0000e+00 - 1s/epoch - 53ms/step
Epoch 2/500
23/23 - 0s - loss: 3.2572 - accuracy: 0.0435 - 70ms/epoch - 3ms/step
Epoch 3/500
23/23 - 0s - loss: 3.2334 - accuracy: 0.0435 - 69ms/epoch - 3ms/step
Epoch 4/500
23/23 - 0s - loss: 3.2116 - accuracy: 0.0435 - 68ms/epoch - 3ms/step
Epoch 5/500
23/23 - 0s - loss: 3.1915 - accuracy: 0.0000e+00 - 70ms/epoch - 3ms/step
Epoch 6/500
23/23 - 0s - loss: 3.1735 - accuracy: 0.0435 - 67ms/epoch - 3ms/step
Epoch 7/500
23/23 - 0s - loss: 3.1548 - accuracy: 0.0000e+00 - 72ms/epoch - 3ms/step
Epoch 8/500
23/23 - 0s - loss: 3.1381 - accuracy: 0.0435 - 71ms/epoch - 3ms/step
Epoch 9/500
23/23 - 0s - loss: 3.1262 - accuracy: 0.0435 - 83ms/epoch - 4ms/step
Epoch 10/500
23/23 - 0s - loss: 3.1083 - accuracy: 0.0435 - 75ms/epoch - 3ms/step
Epoch 11/500
23/23 - 0s - loss: 3.0976 - accuracy: 0.0435 - 72ms/epoch - 3ms/step
Epoch 12/500
23/23 - 0s - loss: 3.0824 - accuracy: 0.0435 - 73ms/epoch - 3ms/step
Epoch 13/500
2

<keras.src.callbacks.History at 0x7b02c118e080>

In [41]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))



Model Accuracy: 95.65%


In [42]:
# 讓我們擷取2個字符轉成張量結構 shape:(1,2,1)來進行infer
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1)) #for predict
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['A', 'B'] -> D
['B', 'C'] -> E
['C', 'D'] -> F
['D', 'E'] -> G
['E', 'F'] -> H
['F', 'G'] -> I
['G', 'H'] -> J
['H', 'I'] -> K
['I', 'J'] -> L
['J', 'K'] -> M
['K', 'L'] -> N
['L', 'M'] -> O
['M', 'N'] -> P
['N', 'O'] -> Q
['O', 'P'] -> R
['P', 'Q'] -> S
['Q', 'R'] -> T
['R', 'S'] -> U
['S', 'T'] -> V
['T', 'U'] -> W
['U', 'V'] -> X
['V', 'W'] -> Z
['W', 'X'] -> Z
