# LSTM

In [1]:
# LSTM Encoder
import tensorflow as tf

class Encoder(tf.keras.Model):
  # LSTM encoder
  def __init__(self, vocab_size, embedding_dim, enc_units):
    super(Encoder, self).__init__()
    # Embedding Layer size : {vocab size, embedding dimension}
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    # Encoding unit as LSTM
    self.lstm = tf.keras.layers.LSTM(enc_units) # return_sequences 매개변수를 기본값 False로 전달

  def call(self, x):
    print("입력 Shape:", x.shape)

    x = self.embedding(x)
    print("Embedding Layer를 거친 Shape:", x.shape)

    output = self.lstm(x)
    print("LSTM Layer의 Output Shape:", output.shape)

    return output

print("슝~")

슝~


# Choose the hyperparameters

In [2]:
vocab_size = 30000
emb_size = 256
lstm_size = 512
batch_size = 1
sample_seq_len = 3

print("Vocab Size: {0}".format(vocab_size))
print("Embedidng Size: {0}".format(emb_size))
print("LSTM Size: {0}".format(lstm_size))
print("Batch Size: {0}".format(batch_size))
print("Sample Sequence Length: {0}\n".format(sample_seq_len))

Vocab Size: 30000
Embedidng Size: 256
LSTM Size: 512
Batch Size: 1
Sample Sequence Length: 3



In [4]:
encoder = Encoder(vocab_size, emb_size, lstm_size) # (30000, 256, 512)
sample_input = tf.zeros((batch_size, sample_seq_len)) # (1, 3)

sample_output = encoder(sample_input)    # 컨텍스트 벡터로 사용할 인코더 LSTM의 최종 State값

입력 Shape: (1, 3)
Embedding Layer를 거친 Shape: (1, 3, 256)
LSTM Layer의 Output Shape: (1, 512)


In [5]:
sample_input

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

In [8]:
# Encoder 구현에 사용된 변수들을 이어 사용함에 유의!

class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units):
    # Inherited by super
    super(Decoder, self).__init__()
    # Embedding Layer size : {vocab size, embedding dimension} - same order as Encoder
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    # Decoding Unit as LSTM layer
    self.lstm = tf.keras.layers.LSTM(dec_units,
                                     return_sequences=True) # return_sequences 매개변수를 True로 설정
    # Fully connected Layer of size of vocabularies 
    self.fc = tf.keras.layers.Dense(vocab_size)
    # Get the softmax
    self.softmax = tf.keras.layers.Softmax(axis=-1)

  def call(self, x, context_v):  # 디코더의 입력 x와 인코더의 컨텍스트 벡터를 인자로 받는다. 
    print("입력 Shape:", x.shape)

    x = self.embedding(x)
    print("Embedding Layer를 거친 Shape:", x.shape)

    context_v = tf.repeat(tf.expand_dims(context_v, axis=1),
                          repeats=x.shape[1], axis=1)
    x = tf.concat([x, context_v], axis=-1)  # 컨텍스트 벡터를 concat 해준다
    print("Context Vector가 더해진 Shape:", x.shape)

    x = self.lstm(x)
    print("LSTM Layer의 Output Shape:", x.shape)

    output = self.fc(x)
    print("Decoder 최종 Output Shape:", output.shape)

    return self.softmax(output)

print("슝~")

슝~


In [10]:
decoder = Decoder(vocab_size, emb_size, lstm_size)
sample_input = tf.zeros((batch_size, sample_seq_len))

dec_output = decoder(sample_input, sample_output)  # Decoder.call(x, context_v) 을 호출

입력 Shape: (1, 3)
Embedding Layer를 거친 Shape: (1, 3, 256)
Context Vector가 더해진 Shape: (1, 3, 768)
LSTM Layer의 Output Shape: (1, 3, 512)
Decoder 최종 Output Shape: (1, 3, 30000)


In [9]:
print("Vocab Size: {0}".format(vocab_size))
print("Embedidng Size: {0}".format(emb_size))
print("LSTM Size: {0}".format(lstm_size))
print("Batch Size: {0}".format(batch_size))
print("Sample Sequence Length: {0}\n".format(sample_seq_len))

Vocab Size: 30000
Embedidng Size: 256
LSTM Size: 512
Batch Size: 1
Sample Sequence Length: 3



```python
tf.keras.layers.Dense(1) 
``` 
performs a linear transformation followed by an optional activation function. It can be used to create output layers for regression problems, or hidden layers for classification or other problems.

In [11]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    # Decoder, Encoder. All initiated with Dense
    self.W_decoder = tf.keras.layers.Dense(units)
    self.W_encoder = tf.keras.layers.Dense(units)
    # Dense into one value output when combine
    self.W_combine = tf.keras.layers.Dense(1)

  def call(self, H_encoder, H_decoder):
    print("[ H_encoder ] Shape:", H_encoder.shape)
    # Encoder out with Dense layer
    H_encoder = self.W_encoder(H_encoder)
    print("[ W_encoder X H_encoder ] Shape:", H_encoder.shape)

    print("\n[ H_decoder ] Shape:", H_decoder.shape)
    # expand_dims makes H_decoder to have one-length long in 1-position
    H_decoder = tf.expand_dims(H_decoder, 1)
    # Decoder out with Dense layer
    H_decoder = self.W_decoder(H_decoder)
    
    print("[ W_decoder X H_decoder ] Shape:", H_decoder.shape)
    # Combined Layer of tanh operation with sum of decoder and encoder 
    score = self.W_combine(tf.nn.tanh(H_decoder + H_encoder))
    print("[ Score_alignment ] Shape:", score.shape)
    
    attention_weights = tf.nn.softmax(score, axis=1)
    print("\n최종 Weight:\n", attention_weights.numpy())

    context_vector = attention_weights * H_decoder
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

W_size = 100

print("Hidden State를 {0}차원으로 Mapping\n".format(W_size))

attention = BahdanauAttention(W_size)

enc_state = tf.random.uniform((1, 10, 512))
dec_state = tf.random.uniform((1, 512))

_ = attention(enc_state, dec_state)

Hidden State를 100차원으로 Mapping

[ H_encoder ] Shape: (1, 10, 512)
[ W_encoder X H_encoder ] Shape: (1, 10, 100)

[ H_decoder ] Shape: (1, 512)
[ W_decoder X H_decoder ] Shape: (1, 1, 100)
[ Score_alignment ] Shape: (1, 10, 1)

최종 Weight:
 [[[0.11343235]
  [0.08114405]
  [0.0456247 ]
  [0.06608689]
  [0.0778655 ]
  [0.07094301]
  [0.1841287 ]
  [0.15177865]
  [0.11997903]
  [0.08901719]]]
