<a href="https://colab.research.google.com/github/kimdonggyu2008/music_generation/blob/main/transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from keras.layers import(
    Dense,
    Dropout,
    Embedding,
    LayerNormalization,
    MultiHeadAttention,
)


In [None]:

def sinusoidal_position_encoding(num_positions,d_model):
  angles=_get_angles(
      np.arange(num_positions)[:,np.newaxis],
      np.arange(d_model)[np.newaxis,:],
      d_model,
  )

  sines=np.sin(angles[:,0::2])

  cosines=np.cos(angles[:,1::2])

  pos_encoding=np.concatenate([sines,cosines],axis=-1)
  pos_encoding=pos_encoding[np.newaxis, ...]

  return tf.cast(pos_encoding,dtype=tf.float32)

def _get_angles(pos,i,d_model):
  angle_dropout_rates = 1 / np.power(
    10000, (2 * (i // 2)) / np.float32(d_model)
  )
  return pos * angle_dropout_rates


class Transformer(tf.keras.Model): #인코더 + 디코더
  def __init__(
      self,
      num_layers,# 모델의 차원 갯수 = 요소 갯수?
      d_model,
      num_heads, #attention head의 갯수
      d_feedforward,# 순방향 신경망의 차원 갯수
      input_vocab_size,#입력되는 각 단어들의 갯수 = 문장을 구성하는 단어들의 갯수(?)
      target_vocab_size,
      max_num_positions_in_pe_encoder,# 입력 최대갯수
      max_num_positions_in_pe_decoder,# 출력 갯수
      dropout_rate=0.1#드롭아웃 비율
  ):
      super(Transformer,self).__init__()
      self.encoder=Encoder( #트랜스포머에 포함된 인코더
          num_layers,
          d_model,
          num_heads,
          d_feedforward,
          input_vocab_size,
          max_num_positions_in_pe_encoder,
          dropout_rate,
      )
      self.decoder=Decoder( #트랜스포머에 포함된 디코더
          num_layers,
          d_model,
          num_heads,
          d_feedforward,
          target_vocab_size,
          max_num_positions_in_pe_decoder,
          dropout_rate,
      )
      self.final_layer=Dense(target_vocab_size)

  def call(
      self,
      input,
      target,
      training,
      enc_padding_mask, #크기를 맞추기 위한 마스크
      look_ahead_mask,
      dec_padding_mask,
  ):
    enc_output=self.encoder(
        input,training,enc_padding_mask #인코더에서 받는 입력값과 결과값 저장
    )
    dec_output=self.decoder( #인코더에서의 결과값을 입력받고, 마스크에 맞게 결과 추정
        target,enc_output,training,look_ahead_mask,dec_padding_mask
    )
    logits=self.final_layer( #완전연결 레이어로 마지막 레이어 구성
        dec_output
    )
    return logits #최종 결과값, softmax안썼음


class Encoder(tf.keras.layers.Layer): #keras를 상속받음
  def __init__(
      self,
      num_layers,
      d_model,
      num_heads,
      d_feedforward,
      input_vocab_size,
      maximum_positions_in_pe,
      dropout_rate=0.1,
  ):
    super(Encoder,self).__init__()
    self.d_model=d_model
    self.num_layers=num_layers

    self.embedding=Embedding(input_vocab_size,d_model)
    self.pos_encoding=sinusoidal_position_encoding(
        maximum_positions_in_pe,d_model
    )
    self.enc_layers=[
        EncoderLayer(d_model,num_heads,d_feedforward,dropout_rate)
        for _ in range(num_layers)
    ]
    self.dropout=Dropout(dropout_rate)

  def call(self,x,training,mask): #인코더가 불렸을 때의 실행
    x=self.embedding(x)
    x*=tf.math.sqrt(tf.cast(self.d_model,tf.float32)) #스케일링을 통해 조금 더 쉽게 계산할 수 있게 함

    sliced_pos_encoding=self._get_sliced_positional_encoding(x) #
    x+=sliced_pos_encoding

    x=self.dropout(x,training=training) #드롭아웃으로 과적합 방지

    for i in range(self.num_layers): #각 내용에 대해서
      x=self.enc_layers[i](x,training,mask) #인코딩 레이어로 훈련된 마스크를 활용해 계산

    return x #인코더 적용된 값을 반환


  def _get_sliced_positional_encoding(self,x):
    number_of_tokens=x.shape[1] # 하나의 데이터에 몇개의 토큰이 포함되어 있는지 확인
    return self.pos_encoding[:,:number_of_tokens,:] #


class Decoder(tf.keras.layers.Layer): #
  def __init__(
      self,
      num_layers,
      d_model,
      num_heads,
      d_feedforward,
      target_vocab_size,
      maximum_positions_in_pe,
      dropout_rate=0.1,
  ):
    super(Decoder,self).__init__()
    self.d_model=d_model
    self.num_layers=num_layers

    self.embedding=Embedding(target_vocab_size,d_model)
    self.pos_encoding=sinusoidal_position_encoding(
        maximum_positions_in_pe,d_model
    )

    self.dec_layers=[
        DecoderLayer(d_model,num_heads,d_feedforward,dropout_rate)
        for _ in range(num_layers)
    ]
    self.dropoout=Dropout(dropout_rate)

  def call(self,x,enc_output,training,look_ahead_mask,padding_mask):
    x=self.embedding(x)
    x*=tf.math.sqrt(tf.cast(self.d_model,tf.float32))

    sliced_pos_encoding=self._get_sliced_positional_encoding(x)
    x+=sliced_pos_encoding

    x=self.dropout(x,training=training)

    for i in range(self.num_layers):
      x=self.dec_layers[i](
          x,enc_output,training,look_ahead_mask,padding_mask
      )
    return x

  def _get_sliced_positional_encoding(self,x):
    number_of_tokens=x.shape[1]
    return self.pos_encoding[:,:number_of_tokens,:]


class EncoderLayer(tf.keras.layers.Layer): #인코더 상속받음
  def __init__(self,d_model,num_heads,d_feedforward,dropout_rate=0.1):
    super(EncoderLayer,self).__init__()
    self.mha=MultiHeadAttention(key_dim=d_model,num_heads=num_heads)
    self.ffn=tf.keras.Sequential( #트랜스포머에서 인코딩 레이어 실행
        [Dense(d_feedforward,activation="relu"),Dense(d_model)] #순방향 학습, relu 활성화 함수,

    )
    self.layernorm1=LayerNormalization(epsilon=1e-6)
    self.layernorm2=LayerNormalization(epsilon=1e-6)
    self.dropout1=Dropout(dropout_rate)
    self.dropout2=Dropout(dropout_rate)

  def call(self,x,training,mask):
    attn_output=self.mha(x,x,x,attention_mask=mask)
    attn_output=self.dropout1(attn_output,training=training)
    out1=self.layernorm1(x+attn_output)

    ffn_output=self.ffn(out1)
    ffn_output=self.dropout2(ffn_output,training=training)
    out2=self.layernorm2(out1+ffn_output)
    return out2


class DecoderLayer(tf.keras.layers.Layer): #디코더 상속받음
  def __init__(self,d_model,num_heads,d_feedforward,dropout_rate=0.1):
    super(DecoderLayer,self).__init__()
    self.mha1=MultiHeadAttention(key_dim=d_model,num_heads=num_heads)
    self.mha2=MultiHeadAttention(key_dim=d_model,num_heads=num_heads)

    self.ffn=tf.keras.Sequential(
        [Dense(d_feedforward,activation="relu"),Dense(d_model)]

    )
    self.layernorm1=LayerNormalization(epsilon=1e-6)
    self.layernorm2=LayerNormalization(epsilon=1e-6)
    self.layernorm3=LayerNormalization(epsilon=1e-6)
    self.dropout1=Dropout(dropout_rate)
    self.dropout2=Dropout(dropout_rate)
    self.dropout3=Dropout(dropout_rate)

  def call(self,x,enc_output,training,look_ahead_mask,padding_mask):
    attn1=self.mha1(x,x,x,attention_mask=look_ahead_mask)
    attn1=self.dropout1(attn1,training=training)
    out1=self.layernorm1(attn1+x)

    attn2=self.mha2(
        out1,enc_output,enc_output,attention_mask=padding_mask
    )
    attn2=self.dropout2(attn2,training=training)
    out2=self.layernorm2(attn2+out1)

    ffn_output=self.ffn(out2)
    ffn_output=self.dropout3(ffn_output,training=training)
    out3=self.layernorm3(ffn_output+out2)

    return out3


if __name__=="__main__":
  num_layers=2
  d_model=64
  num_heads=2
  d_feedforward=128
  input_vocab_size=100
  target_vocab_size=100
  dropout_dropout_rate=0.1
  pe_input=10
  pe_target=10

  transformer_model=Transformer(
      num_layers,
      d_model,
      num_heads,
      d_feedforward,
      input_vocab_size,
      target_vocab_size,
      pe_input,
      pe_target,
      dropout_dropout_rate,
  )

  dummy_inp=tf.random.uniform(
      (1,10),dtype=tf.int64,minval=0,maxval=target_vocab_size

  )
  dummy_tar=tf.random.uniform(
      (1,10),dtype=tf.int64,minval=0,maxval=target_vocab_size
  )

  transformer_model(
      dummy_inp,
      dummy_tar,
      training=False,
      enc_padding_mask=None,
      look_ahead_mask=None,
      dec_padding_mask=None,
  )

  transformer_model.summary()

AttributeError: Exception encountered when calling layer 'decoder' (type Decoder).

'Decoder' object has no attribute 'dropout'

Call arguments received by layer 'decoder' (type Decoder):
  • x=tf.Tensor(shape=(1, 10), dtype=int64)
  • enc_output=tf.Tensor(shape=(1, 10, 64), dtype=float32)
  • training=False
  • look_ahead_mask=None
  • padding_mask=None