# 03-2 트랜스포머를 사용한 텍스트 생성

<table align="left"><tr><td>
<a href="https://colab.research.google.com/github/rickiepark/hm-dl/blob/main/03-2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="코랩에서 실행하기"/></a>
</td></tr></table>

코랩에서 이 노트북을 실행하려면 High-RAM CPU 런타임을 사용해야 합니다.

이 절의 코드를 실행하려면 `keras-nlp` 패키지와 허깅페이스 `transformers` 패키지를 위한 `tf-keras`를 설치해야 합니다.

In [None]:
pip install -U tensorflow keras-nlp tf-keras

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-nlp
  Downloading keras_nlp-0.12.1-py3-none-any.whl (570 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m570.5/570.5 kB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
Collecting tf-keras
  Downloading tf_keras-2.16.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m87.3 MB/s[0m eta [36m0:00:00[0m
Collecting h5py>=3.10.0 (from tensorflow)
  Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
Collecting ml-dtypes~=0.3.1 (from tensorflow)
  Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux

In [None]:
import keras
import keras_nlp

keras.__version__, keras_nlp.__version__

('3.3.3', '0.12.1')

In [None]:
import keras
from keras import layers
import keras_nlp

In [None]:
def make_causal_mask(seq_len):
    n_hori = keras.ops.arange(seq_len)
    n_vert = keras.ops.expand_dims(n_hori, axis=-1)
    mask = n_vert >= n_hori
    return mask

In [None]:
def make_attention_mask(padding_mask):
    # padding_mask 크기가 (2, 5)라고 가정해 보죠.
    batch_size, seq_len = keras.ops.shape(padding_mask)
    # causal_mask 크기는 (5, 5)가 됩니다.
    causal_mask = make_causal_mask(seq_len)
    # 배치 차원을 추가해 (2, 5, 5)로 만듭니다.
    causal_mask = keras.ops.broadcast_to(causal_mask, (batch_size, seq_len, seq_len))
    # 브로드캐스팅을 위해 padding_mask 크기를 (2, 1, 5)로 만듭니다.
    padding_mask = keras.ops.expand_dims(padding_mask, axis=1)
    return keras.ops.minimum(causal_mask, padding_mask)

In [None]:
class AttentionMask(keras.Layer):
    def call(self, padding_mask):
        return make_attention_mask(padding_mask)

## Gemma

### 젬마 구현하기

In [None]:
from keras_nlp.src.models.gemma.gemma_attention import CachedGemmaAttention
from keras_nlp.src.models.gemma.rms_normalization import RMSNormalization

def gemma_decoder(x, padding_mask, num_query_heads, num_key_value_heads,
                  interm_dim, hidden_dim, head_dim):
    # 어텐션 마스크를 계산합니다.
    attention_mask = AttentionMask()(padding_mask)
    # 스킵 연결을 준비합니다.
    residual = x
    x = RMSNormalization()(x)
    # 멀티 헤드 어텐션을 통과합니다.
    gemma_attention = CachedGemmaAttention(head_dim=head_dim,
                                           num_query_heads=num_query_heads,
                                           num_key_value_heads=num_key_value_heads,
                                           dropout=0.0)
    x = gemma_attention(x, attention_mask)
    # 스킵 연결
    x = x + residual
    # 스킵 연결을 준비합니다.
    residual = x
    # 위치별 피드 포워드 네트워크
    x = RMSNormalization()(x)
    x1 = layers.Dense(interm_dim // 2, activation='gelu', use_bias=False)(x)
    x2 = layers.Dense(interm_dim // 2, use_bias=False)(x)
    x = x1 * x2
    x = layers.Dense(hidden_dim, use_bias=False)(x)
    # 스킵 연결
    x = x + residual
    return x

In [None]:
# Gemma 2B
def make_gemma():
    vocab_size = 256000
    num_layers = 18
    num_query_heads = 8
    num_key_value_heads = 1
    interm_dim = 32768
    hidden_dim = 2048
    head_dim = 256

    token_ids = keras.Input(shape=(None,))
    padding_mask = keras.Input(shape=(None,))

    token_embedding_layer = keras_nlp.layers.ReversibleEmbedding(vocab_size, hidden_dim)
    x = token_embedding_layer(token_ids)
    x = layers.Lambda(lambda x: x * keras.ops.sqrt(hidden_dim))(x)

    for _ in range(num_layers):
        x = gemma_decoder(x, padding_mask, num_query_heads, num_key_value_heads,
                        interm_dim, hidden_dim, head_dim)

    x = RMSNormalization()(x)
    outputs = token_embedding_layer(x, reverse=True)
    return keras.Model(inputs=(token_ids, padding_mask),
                        outputs=(outputs))


model = make_gemma()
model.summary(line_length=100)

In [None]:
del model

In [None]:
import gc

gc.collect()

### 젬마 모델 사용하기

In [None]:
gemma = keras_nlp.models.GemmaCausalLM.from_preset('gemma_2b_en')
gemma.summary()

Downloading from https://www.kaggle.com/api/v1/models/keras/gemma/keras/gemma_2b_en/2/download/task.json...
Downloading from https://www.kaggle.com/api/v1/models/keras/gemma/keras/gemma_2b_en/2/download/preprocessor.json...


In [None]:
sampler = keras_nlp.samplers.TopPSampler(p=0.8, seed=42)
gemma.compile(sampler=sampler)
gemma.generate('stay hungry, stay', max_length=20)

'stay hungry, stay foolish.  - steve jobs\nThomas Edison would have been proud of us'

In [None]:
gemma.generate('봄이 오면', max_length=20)

'봄이 오면 전국에서 생강이 생산된다. 전국생강'

In [None]:
gemma_pipe = pipeline("text-generation", model="beomi/gemma-ko-2b")
set_seed(42)
gemma_pipe('봄이 오면', max_length=20, truncation=True)



special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

[{'generated_text': '봄이 오면서 봄꽃이 피기 시작하면서 봄꽃축제가'}]

## 심화 예제

In [None]:
# llama3
def make_llama3():
    vocab_size = 128256
    num_layers = 32
    num_query_heads = 32
    num_key_value_heads = 8
    interm_dim = 14336
    hidden_dim = 4096

    token_ids = keras.Input(shape=(None,))
    padding_mask = keras.Input(shape=(None,))

    token_embedding_layer = keras_nlp.layers.ReversibleEmbedding(vocab_size, hidden_dim,
                                                                tie_weights=False)
    x = token_embedding_layer(token_ids)

    for _ in range(num_layers):
        x = llama_decoder(x, padding_mask, num_query_heads, num_key_value_heads,
                        interm_dim, hidden_dim)

    x = LlamaLayerNorm()(x)
    outputs = token_embedding_layer(x, reverse=True)
    return keras.Model(inputs=(token_ids, padding_mask),
                       outputs=(outputs))


model = make_llama3()
model.summary(line_length=100)