In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# 訓練數據：簡單句子
text = "I’m 'too excited' — a concept that appears in this piece — to share this exploration of microchips, democracy, and the face of God. What I saw in Taiwan, and especially in the mysterious wonder-of-the-world fabs at TSMC, changed my thinking about everything. I found pure awe, and durable hope for the future. Democracy will survive. "

# 數據處理
# 將文字分割為單詞
words = text.lower().split()
vocab = sorted(set(words))
word_to_index = {w: i for i, w in enumerate(vocab)}
index_to_word = {i: w for i, w in enumerate(vocab)}

# 構造輸入和目標數據
sequence_length = 3  # 每次用 3 個單詞預測下一個單詞
sequences = []
next_words = []

for i in range(len(words) - sequence_length):
    seq = words[i:i + sequence_length]
    next_word = words[i + sequence_length]
    sequences.append([word_to_index[w] for w in seq])
    next_words.append(word_to_index[next_word])

# 將數據轉換為 numpy 陣列
X = np.array(sequences)
y = to_categorical(next_words, num_classes=len(vocab))

# 構建 LSTM 模型
model = Sequential([
    Embedding(input_dim=len(vocab), output_dim=50, input_length=sequence_length),
    LSTM(128, return_sequences=False),
    Dense(len(vocab), activation='softmax')
])

# 編譯模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 訓練模型
model.fit(X, y, epochs=500, verbose=2, batch_size=16)

# 生成句子
def generate_text(seed_text, num_words):
    result = seed_text.split()
    for _ in range(num_words):
        # 將當前文本轉換為數字序列
        input_sequence = [word_to_index.get(w, 0) for w in result[-sequence_length:]]
        input_sequence = pad_sequences([input_sequence], maxlen=sequence_length, padding='pre')
        # 預測下一個單詞
        predicted_probs = model.predict(input_sequence, verbose=0)
        predicted_index = np.argmax(predicted_probs)
        # 將預測的單詞加入結果
        result.append(index_to_word[predicted_index])
    return ' '.join(result)

# 測試文本生成
seed = "In Taiwan, "
print(generate_text(seed, 10))



Epoch 1/500
4/4 - 6s - 1s/step - accuracy: 0.0566 - loss: 3.8303
Epoch 2/500
4/4 - 0s - 45ms/step - accuracy: 0.1132 - loss: 3.8219
Epoch 3/500
4/4 - 0s - 14ms/step - accuracy: 0.1132 - loss: 3.8156
Epoch 4/500
4/4 - 0s - 14ms/step - accuracy: 0.1698 - loss: 3.8088
Epoch 5/500
4/4 - 0s - 15ms/step - accuracy: 0.2075 - loss: 3.8018
Epoch 6/500
4/4 - 0s - 9ms/step - accuracy: 0.2264 - loss: 3.7941
Epoch 7/500
4/4 - 0s - 8ms/step - accuracy: 0.2075 - loss: 3.7852
Epoch 8/500
4/4 - 0s - 15ms/step - accuracy: 0.2264 - loss: 3.7744
Epoch 9/500
4/4 - 0s - 9ms/step - accuracy: 0.2264 - loss: 3.7628
Epoch 10/500
4/4 - 0s - 9ms/step - accuracy: 0.2264 - loss: 3.7473
Epoch 11/500
4/4 - 0s - 17ms/step - accuracy: 0.1887 - loss: 3.7294
Epoch 12/500
4/4 - 0s - 12ms/step - accuracy: 0.1887 - loss: 3.7078
Epoch 13/500
4/4 - 0s - 9ms/step - accuracy: 0.1698 - loss: 3.6800
Epoch 14/500
4/4 - 0s - 9ms/step - accuracy: 0.1698 - loss: 3.6476
Epoch 15/500
4/4 - 0s - 9ms/step - accuracy: 0.1698 - loss: 3.604