# 循環神經網絡 LSTM (長短期記憶)+ embedding 來學習字母表順序

## 模型 1. 用LSTM學習一個字符到一個字符映射

### STEP1. 匯入 Keras 及相關模組

In [1]:
import numpy
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 給定隨機的種子, 以便讓大家跑起來的結果是相同的
numpy.random.seed(7)

### STEP2. 準備資料

我們現在可以定義我們的數據集，字母表(alphabet)。為了便於閱讀，我們使用大寫字母來定義字母表。

我們需要將字母表的每個字母映射到數字以便使用人工網絡來進行訓練。我們可以通過為字符創建字母索引的字典來輕鬆完成此操作。
我們還可以創建一個反向查找，將預測轉換回字符以供以後使用。

In [2]:
# 定義序列數據集
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# 創建字符映射到整數（0 - 25)和反相的查詢字典物件
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [3]:
# 打印看一下
print("字母對應到數字編號: \n", char_to_int)
print("\n")

print("數字編號對應到字母: \n", int_to_char)

字母對應到數字編號: 
 {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}


數字編號對應到字母: 
 {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'}


### STEP3. 準備訓練用資料

現在我們需要創建我們的輸入(X)和輸出(y)來訓練我們的神經網絡。我們可以通過定義一個輸入序列長度，然後從輸入字母序列中讀取序列。
例如，我們使用輸入長度1.從原始輸入數據的開頭開始，我們可以讀取第一個字母“A”，下一個字母作為預測“B”。我們沿著一個字符移動並重複，直到達到“Z”的預測。

In [4]:
# 準備輸入數據集
seq_length = 1
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [5]:
print(dataX)
print(dataY)

[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]


### STEP4. 資料預處理

> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如:

> 給 J -> 預測 K

> 給 X -> 預測 Y


In [6]:
X = numpy.array(dataX)

# 歸一化
# X = X / float(len(alphabet)) # Let Embedding do the job!

# one-hot 編碼輸出變量
y = utils.to_categorical(dataY)

print("X shape: ", X.shape) # (25筆samples, "1"個時間步長) (batch, timesteps)
print("y shape: ", y.shape)

X shape:  (25, 1)
y shape:  (25, 26)


### STEP5. 建立模型

In [18]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout, Embedding

# 創建模型
model = Sequential()   # 輸入 （sample size，time steps ） with label encoding!!!
model.add(Embedding(26, 8, input_length=X.shape[1])) # => Embedding參數(input features, output features (word2vec's size), time steps)
model.add(LSTM(32))
model.add(Dense(26, activation='softmax'))

model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 1, 8)              208       
                                                                 
 lstm_5 (LSTM)               (None, 32)                5248      
                                                                 
 dense_5 (Dense)             (None, 26)                858       
                                                                 
Total params: 6314 (24.66 KB)
Trainable params: 6314 (24.66 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### STEP6. 定義訓練並進行訓練

In [19]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
model.fit(X, y, epochs=100, batch_size=1, verbose=2)

Epoch 1/100
25/25 - 2s - loss: 3.2636 - categorical_accuracy: 0.0000e+00 - 2s/epoch - 77ms/step
Epoch 2/100
25/25 - 0s - loss: 3.2570 - categorical_accuracy: 0.0400 - 82ms/epoch - 3ms/step
Epoch 3/100
25/25 - 0s - loss: 3.2542 - categorical_accuracy: 0.0800 - 87ms/epoch - 3ms/step
Epoch 4/100
25/25 - 0s - loss: 3.2512 - categorical_accuracy: 0.0800 - 84ms/epoch - 3ms/step
Epoch 5/100
25/25 - 0s - loss: 3.2479 - categorical_accuracy: 0.1200 - 85ms/epoch - 3ms/step
Epoch 6/100
25/25 - 0s - loss: 3.2439 - categorical_accuracy: 0.2000 - 86ms/epoch - 3ms/step
Epoch 7/100
25/25 - 0s - loss: 3.2394 - categorical_accuracy: 0.3200 - 85ms/epoch - 3ms/step
Epoch 8/100
25/25 - 0s - loss: 3.2336 - categorical_accuracy: 0.3200 - 86ms/epoch - 3ms/step
Epoch 9/100
25/25 - 0s - loss: 3.2258 - categorical_accuracy: 0.4400 - 81ms/epoch - 3ms/step
Epoch 10/100
25/25 - 0s - loss: 3.2168 - categorical_accuracy: 0.6400 - 85ms/epoch - 3ms/step
Epoch 11/100
25/25 - 0s - loss: 3.2051 - categorical_accuracy: 0.6

<keras.src.callbacks.History at 0x7a91354d1060>

### STEP7. 評估模型準確率

In [20]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 100.00%


### STEP8. 預測結果

In [21]:
# 展示模型預測能力
for pattern in dataX:
    # 把26個字母一個個拿進模型來預測會出現的字母
    x = numpy.reshape(pattern, (1, len(pattern)))
    # x = x / float(len(alphabet))!!! 不用作!!

    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction) # 機率最大的idx
    result = int_to_char[index] # 看看預測出來的是那一個字母
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result) # 打印結果

['A'] -> B
['B'] -> C
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> V
['V'] -> W
['W'] -> X
['X'] -> Y
['Y'] -> Z




---



---



## 模型 3. LSTM 學習三個字符的時間步驟窗口(Three-Char Time Step Window)到一個字符的映射

### STEP1. 準備訓練用資料

In [22]:
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


### STEP2. 資料預處理


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如:

> 給 HIJ -> 預測 K

> 給 EFG -> 預測 H

目標訓練張量結構: (samples, time_steps) -> (n , **3**)


In [23]:
X = numpy.array(dataX)

# one-hot 編碼輸出變量
y = utils.to_categorical(dataY)


print("X shape: ", X.shape) # (25筆samples, "3"個時間步長) (batch, timesteps)
print("y shape: ", y.shape)

X shape:  (23, 3)
y shape:  (23, 26)


### STEP3. 建立模型

In [24]:
# 創建模型 (your work)
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout, Embedding

# 創建模型
model = Sequential()   # 輸入 （sample size，time steps ） with label encoding!!!
model.add(Embedding(len(X)+1, 4, input_length=3)) # => Embedding參數(input features, output features (word2vec's size), time steps)
model.add(LSTM(32)) # 輸入 （sample size，time steps, input features）
model.add(Dense(26, activation='softmax'))

model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 3, 4)              96        
                                                                 
 lstm_6 (LSTM)               (None, 32)                4736      
                                                                 
 dense_6 (Dense)             (None, 26)                858       
                                                                 
Total params: 5690 (22.23 KB)
Trainable params: 5690 (22.23 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### STEP4. 定義訓練並進行訓練

In [25]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
model.fit(X, y, epochs=100, batch_size=1, verbose=2)

Epoch 1/100
23/23 - 3s - loss: 3.2647 - categorical_accuracy: 0.0000e+00 - 3s/epoch - 114ms/step
Epoch 2/100
23/23 - 0s - loss: 3.2560 - categorical_accuracy: 0.0435 - 122ms/epoch - 5ms/step
Epoch 3/100
23/23 - 0s - loss: 3.2519 - categorical_accuracy: 0.0435 - 129ms/epoch - 6ms/step
Epoch 4/100
23/23 - 0s - loss: 3.2475 - categorical_accuracy: 0.0435 - 114ms/epoch - 5ms/step
Epoch 5/100
23/23 - 0s - loss: 3.2421 - categorical_accuracy: 0.1304 - 112ms/epoch - 5ms/step
Epoch 6/100
23/23 - 0s - loss: 3.2360 - categorical_accuracy: 0.0870 - 112ms/epoch - 5ms/step
Epoch 7/100
23/23 - 0s - loss: 3.2281 - categorical_accuracy: 0.1739 - 117ms/epoch - 5ms/step
Epoch 8/100
23/23 - 0s - loss: 3.2182 - categorical_accuracy: 0.1739 - 90ms/epoch - 4ms/step
Epoch 9/100
23/23 - 0s - loss: 3.2034 - categorical_accuracy: 0.2609 - 78ms/epoch - 3ms/step
Epoch 10/100
23/23 - 0s - loss: 3.1820 - categorical_accuracy: 0.2609 - 82ms/epoch - 4ms/step
Epoch 11/100
23/23 - 0s - loss: 3.1496 - categorical_accura

<keras.src.callbacks.History at 0x7a91351c2e00>

### STEP5. 評估模型準確率

In [26]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 100.00%


### STEP6. 預測結果

In [27]:
# 讓我們擷取3個字符轉成張量結構 shape:(1,3,1)來進行infer
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    # x = x / float(len(alphabet)) !!! 不做!
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> W
['U', 'V', 'W'] -> X
['V', 'W', 'X'] -> Y
['W', 'X', 'Y'] -> Z
