### LSTM 모델 설계

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense


In [11]:
print(tf.__version__)
### GPU 사용 가능 여부 확인
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("사용 가능한 GPU:")
    for gpu in gpus:
        print(gpu)
else:
    print("GPU가 인식되지 않았습니다.")

2.17.0
사용 가능한 GPU:
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


- 모델설계

In [12]:
# 인코더
encoder_inputs = Input(shape=(None, 50))
encoder_lstm = LSTM(256, return_state=True) # LSTM 레이어 사용

encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
# encoder_outputs: 층의 타임 스텝에서의 출력 정보(출력 상태)
# state_h, state_c: hidden state, cell state

encoder_states = [state_h, state_c]
 # 디코더에 전달될 인코더 정보(최종 상태) state_h , state_c



# 인코더
# 입력 시퀀스 -> hidden state (고정된 크기의 컨텍스트 벡터)  변환
# hidden state 벡터 : 입력 데이터의 전체 정보 압축 표현(일정 크기)
# RNN, LSTM, GRU 구조 사용

I0000 00:00:1722833049.372281    1550 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1722833049.372414    1550 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1722833049.372464    1550 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1722833049.655631    1550 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1722833049.655737    1550 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-08-05

In [13]:
# 디코더
decoder_inputs = Input(shape=(None, 50))
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
   # 인코더로부터 전달받은 정보를 (encoder_states : [state_h, state_c])를 초기 state로 사용
decoder_dense = Dense(50, activation='softmax')
   # 출력 클래스(50개 ex, 다음 단어)에 대한 확률 분포 생성
decoder_outputs = decoder_dense(decoder_outputs)

In [14]:
# 모델 컴파일
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
# 모델 요약
model.summary()

모델학습

In [16]:
# 입력, 출력 데이터 생성 (예시 데이터)
encoder_input_data = np.random.rand(10000, 10, 50)  # 10000개 데이터(길이 10의 시퀀스, 각 시퀀스는 50차원 벡터)
decoder_input_data = np.random.rand(10000, 10, 50)
decoder_target_data = np.random.rand(10000, 10, 50)


In [17]:
encoder_input_data[0]

array([[2.51962024e-01, 1.25604244e-01, 4.14824071e-01, 1.42420975e-01,
        8.57392738e-01, 5.55390520e-01, 2.77898415e-02, 6.53554872e-02,
        2.53479479e-01, 7.24342871e-01, 3.90843485e-01, 1.12741140e-01,
        5.50275526e-02, 7.88168870e-01, 6.32535958e-01, 3.29046181e-01,
        6.72676752e-01, 1.17457140e-01, 7.39902385e-01, 3.76611729e-01,
        7.45029361e-01, 7.01010253e-01, 3.30519438e-01, 9.70808202e-01,
        8.92432758e-01, 7.94259650e-01, 4.35205055e-01, 5.31310240e-01,
        2.18669236e-01, 1.13594047e-01, 1.60185108e-01, 7.80288076e-01,
        1.29118850e-01, 6.03275966e-02, 5.24032488e-01, 2.75181068e-01,
        3.89346725e-01, 9.90357220e-01, 8.19072500e-01, 1.87111157e-01,
        6.89753104e-01, 6.25656144e-01, 6.58256333e-02, 2.11670653e-01,
        2.44329407e-01, 8.56518599e-01, 2.04192213e-01, 9.63982572e-01,
        1.71592442e-01, 5.86103393e-01],
       [9.38521108e-01, 7.46432847e-01, 8.57412827e-01, 8.78090822e-01,
        9.64966560e-01,

In [18]:
# 모델 학습
model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=64, epochs=10, validation_split=0.2)

Epoch 1/10


2024-08-05 13:44:12.210204: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.0206 - loss: 107.8989 - val_accuracy: 0.0191 - val_loss: 111.4423
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.0200 - loss: 111.3419 - val_accuracy: 0.0191 - val_loss: 111.3463
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.0200 - loss: 111.3022 - val_accuracy: 0.0191 - val_loss: 111.3417
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.0206 - loss: 111.2684 - val_accuracy: 0.0191 - val_loss: 111.2410
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.0196 - loss: 111.2197 - val_accuracy: 0.0191 - val_loss: 111.3790
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.0197 - loss: 111.3246 - val_accuracy: 0.0191 - val_loss: 111.2716
Epoch 7/1

<keras.src.callbacks.history.History at 0x714b20ff6a50>

In [19]:
# 모델 평가
# 학습된 모델을 평가하기 위해 테스트 데이터를 사용
encoder_input_test = np.random.rand(2000, 10, 50)
decoder_input_test = np.random.rand(2000, 10, 50)
decoder_target_test = np.random.rand(2000, 10, 50)

loss, accuracy = model.evaluate([encoder_input_test, decoder_input_test], decoder_target_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0201 - loss: 111.2100
Test Loss: 111.2199478149414, Test Accuracy: 0.01994999684393406


예측 수행(모델 적용)

In [20]:
# 인코더 - 새로운 입력 시퀀스를 받아 hidden state 계산
encoder_model = Model(encoder_inputs, encoder_states)
 # encoder_inputs: 이전에 정의된, 훈련된 모델 (변수명 model)의 입력층
 # encoder_states: hidden state, cell state (LSTM)

# 원본 model에서 LSTM 등의 레이어를 통해 학습된 가중치를 새로운 모델의 구성 요소로 재사용
# 학습 단계에서 얻은 데이터의 표현(특징)을 추론 단계에서 활용

In [21]:
# 디코더 입력
decoder_state_input_h = Input(shape=(256,))
decoder_state_input_c = Input(shape=(256,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

In [22]:
# 디코더 출력
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

In [23]:


# 디코더 모델 정의
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

# 시퀀스 종료 토큰과 최대 디코딩 시퀀스 길이 정의
eos_token_index = 0  # 예시로 0번 인덱스를 종료 토큰으로 사용
max_decoder_seq_length = 10

In [24]:

def decode_sequence(input_seq):
    # 인코더에서 상태 벡터 얻기
    states_value = encoder_model.predict(input_seq)

    # 디코더 입력 준비 (시퀀스 시작 토큰)
    target_seq = np.zeros((1, 1, 50))

    # 시퀀스 종료 조건
    stop_condition = False
    decoded_sentence = []

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)
            # 타겟 시퀀스 + 히든스테이트 와 셀스테이트

        # 예측된 토큰을 디코딩하여 출력 시퀀스에 추가
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token = sampled_token_index  # 예시에서는 인덱스 자체를 토큰으로 사용

        decoded_sentence.append(sampled_token)

        # 시퀀스 종료 조건
        if (sampled_token == eos_token_index or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # 디코더 입력 업데이트 (다음 입력 토큰)
        target_seq = np.zeros((1, 1, 50))
        target_seq[0, 0, sampled_token] = 1.0

        # 상태 업데이트
        states_value = [h, c]

    return decoded_sentence



In [25]:
# 새로운 입력 시퀀스 (예시 데이터)
new_encoder_input = np.random.rand(1, 10, 50)




In [26]:
# 예측된 출력 시퀀스
decoded_sentence = decode_sequence(new_encoder_input)
print('Predicted sequence:', decoded_sentence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Predicted sequence: [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34]
