In [54]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping

cal_len = 5 #10으로 하면 (Seq2Seq 방식) 정확도가 떨어짐 LSTM 이라
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
num_classes = len(chars)

char_to_index = {char: idx for idx, char in enumerate(chars)}
index_to_char = {idx: char for idx, char in enumerate(chars)}

train_data = np.random.choice (list(chars), size = (10000, cal_len))
label_data = np.flip(train_data, axis=1)

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

train_data_indices = np.array([[char_to_index[char] for char in seq] for seq in train_data])
label_data_indices = np.array([[char_to_index[char] for char in seq] for seq in label_data])

print(train_data[:4])
print(label_data[:4])

padded_train = pad_sequences(train_data_indices, maxlen=cal_len)
padded_labels = pad_sequences(label_data_indices, maxlen=cal_len)
print(padded_train[0])

model = Sequential()
model.add(Embedding(num_classes, 100, input_length=cal_len))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Dense(num_classes, activation='softmax'))
model.build(input_shape=(None, 52))
model.summary()

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience = 7, restore_best_weights = True)

history= model.fit(padded_train, padded_labels, epochs=500, batch_size=32, validation_split=(0.2), callbacks=[early_stop])

loss, acc = model.evaluate(padded_train, padded_labels)
print(f"loss: {loss:.4f}, acc={acc:.4f}")

test_data = ["".join(np.random.choice(list(chars), cal_len)) for _ in range(20)]

expected_outputs = [s[::-1] for s in test_data]

correct_predictions = 0
total_predictions = len(test_data)

for i, test_string in enumerate(test_data):
    numbers = [char_to_index[char] for char in test_string]
    array_numbers =np.array([numbers])
    padded_numbers = pad_sequences(array_numbers, maxlen=cal_len)
    prediction = model.predict(padded_numbers)
    predicted_indices = np.argmax(prediction, axis=-1)[0]
    predicted_chars = [index_to_char[idx] for idx in predicted_indices]
    predicted_output = "".join(predicted_chars)

    is_correct = predicted_output == expected_outputs[i]
    if is_correct:
        correct_predictions += 1
    print(f" Input: {test_string}")
    print(f" Est. Output: {predicted_output}")
    print(f" expected Output: {expected_outputs[i]}")
    print(f" Correct: {'Yes' if is_correct else 'Wrong'}\n")

accuracy = correct_predictions / total_predictions
print(f" Total Acc: {accuracy*100:.2f}%")

[['y' 'g' 'J' 'S' 'G']
 ['F' 'K' 'c' 'K' 'o']
 ['M' 'Y' 'A' 's' 'C']
 ['h' 'C' 'z' 'C' 'e']]
[['G' 'S' 'J' 'g' 'y']
 ['o' 'K' 'c' 'K' 'F']
 ['C' 's' 'A' 'Y' 'M']
 ['e' 'C' 'z' 'C' 'h']]
[50 32  9 18  6]




Epoch 1/500
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.1720 - loss: 3.6180 - val_accuracy: 0.4303 - val_loss: 1.9112
Epoch 2/500
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4527 - loss: 1.7127 - val_accuracy: 0.4795 - val_loss: 1.4533
Epoch 3/500
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5002 - loss: 1.3536 - val_accuracy: 0.5060 - val_loss: 1.3004
Epoch 4/500
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5319 - loss: 1.2055 - val_accuracy: 0.5296 - val_loss: 1.2137
Epoch 5/500
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5636 - loss: 1.1102 - val_accuracy: 0.5514 - val_loss: 1.1389
Epoch 6/500
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5894 - loss: 1.0255 - val_accuracy: 0.5699 - val_loss: 1.0881
Epoch 7/500
[1m250/25