In [4]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [1]:
cal_len = 5

In [2]:
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
num_classes = len(chars)

char_to_index = {char: idx for idx, char in enumerate(chars)}
index_to_index = {idx: char for idx, char in enumerate(chars)}

In [5]:
# 학습 데이터 생성
train_data = np.random.choice(list(chars), size = (10000, cal_len))
label_data = np.flip(train_data, axis = 1)

In [6]:
# 문자 데이터를 숫자 인덱스로 변환
train_data_indices = np.array([[char_to_index[char] for char in seq] for seq in train_data])
label_data_indices = np.array([[char_to_index[char] for char in seq] for seq in label_data])

print(train_data[:4])
print(label_data[:4])

[['M' 'y' 'r' 'R' 'O']
 ['T' 'h' 'f' 'V' 'F']
 ['G' 'l' 'J' 'R' 'l']
 ['R' 'R' 'K' 'f' 'D']]
[['O' 'R' 'r' 'y' 'M']
 ['F' 'V' 'f' 'h' 'T']
 ['l' 'R' 'J' 'l' 'G']
 ['D' 'f' 'K' 'R' 'R']]


In [7]:
# pad_sequence 전처리
padded_train = pad_sequences(train_data_indices, maxlen = cal_len)
padded_labels = pad_sequences(label_data_indices, maxlen = cal_len)

In [8]:
# 모델링
model = Sequential()
model.add(Embedding(num_classes, 100, input_length = cal_len))
model.add(Bidirectional(LSTM(64, return_sequences = True)))
model.add(Dense(num_classes, activation = 'softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 5, 100)            5200      
                                                                 
 bidirectional (Bidirectiona  (None, 5, 128)           84480     
 l)                                                              
                                                                 
 dense (Dense)               (None, 5, 52)             6708      
                                                                 
Total params: 96,388
Trainable params: 96,388
Non-trainable params: 0
_________________________________________________________________


In [9]:
# 모델 컴파일
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [10]:
# 모델 실행
history = model.fit(padded_train, padded_labels, epochs = 50, batch_size = 32, validation_split = 0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# loss와 accuracy
loss, acc = model.evaluate(padded_train, padded_labels)
print(f"loss : {loss: .4f}, acc = {acc: .4f}")

In [None]:
# 랜덤한 영문 대소문자 5글자를 요소로 하는 20개 리스트 생성
test_data = ["".join(np.random.choice(list(chars), cal_len)) for _ in range(20)]

In [None]:
# 거꾸로된 리스트 생성
expected_outputs = [s[::-1] for s in test_data]

In [None]:
correct_predictions = 0
total_predictons = len(test_data)

for i, test_string in enumerate(test_data):
    numbers = [char_to_index[char] for char in test_string]
    array_numbers = np.array([numbers])
    padded_numbers = pad_sequences(array_numbers, maxlen = cal_len)
    prediction = model.predict(padded_numbers)
    predicted_indices = np.argmax(prediction, axis = -1)[0]
    predicted_chars = [index_to_index[idx] for idx in predicted_indices]
    predicted_output = "".join(predicted_chars)

    is_correct = predicted_output == expected_outputs[i]
    if is_correct:
        correct_predictions += 1
    print(f"입력: {test_string}")
    print(f"예측된 출력:  {predicted_output}")
    print(f"실제 출력: {expected_outputs[i]}")
    print(f"정확 여부: {'맞음' if is_correct else '틀림'}\n")

accuracy = correct_predictions / total_predictons
print(f"총 정확도:  {accuracy * 100:.2f}%")