# [PROJECT] 한국어 데이터로 챗봇 만들기

## Step 1. 데이터 수집하기

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import re
import matplotlib.pyplot as plt

In [2]:
# [데이터 출처](https://github.com/songys/Chatbot_data/blob/master/ChatbotData.csv)

df = pd.read_csv("ChatbotData .csv")
df

Unnamed: 0,Q,A,label
0,12시 땡!,하루가 또 가네요.,0
1,1지망 학교 떨어졌어,위로해 드립니다.,0
2,3박4일 놀러가고 싶다,여행은 언제나 좋죠.,0
3,3박4일 정도 놀러가고 싶다,여행은 언제나 좋죠.,0
4,PPL 심하네,눈살이 찌푸려지죠.,0
...,...,...,...
11818,훔쳐보는 것도 눈치 보임.,티가 나니까 눈치가 보이는 거죠!,2
11819,훔쳐보는 것도 눈치 보임.,훔쳐보는 거 티나나봐요.,2
11820,흑기사 해주는 짝남.,설렜겠어요.,2
11821,힘든 연애 좋은 연애라는게 무슨 차이일까?,잘 헤어질 수 있는 사이 여부인 거 같아요.,2


## Step 2. 데이터 전처리하기

In [3]:
def preprocessing_sentence(sentence):
    # 양쪽 공백 제거
    sentence = sentence.strip()
        
    # 중복 공백 제거
    sentence = re.sub(r"([?.!,])", r" \1 ", sentence)

    # 한글, 숫자, 기타 부호 제외 문자 공백으로 대체
    sentence = re.sub(r"[^ㄱ-ㅎㅏ-ㅣ가-힣0-9?.!,~… ]+", " ", sentence)

    # 중복 공백 다시 제거
    sentence = re.sub(r'\s+', ' ', sentence).strip()

    return sentence

In [4]:
# 질문과 답변의 쌍인 데이터셋을 구성하기 위한 데이터 로드 함수

questions = [preprocessing_sentence(q) for q in df['Q']]
answers = [preprocessing_sentence(a) for a in df['A']]

print('전체 샘플 수 :', len(questions))
print('전체 샘플 수 :', len(answers))

전체 샘플 수 : 11823
전체 샘플 수 : 11823


In [5]:
print('전처리 후의 22번째 질문 샘플: {}'.format(questions[21]))
print('전처리 후의 22번째 답변 샘플: {}'.format(answers[21]))

전처리 후의 22번째 질문 샘플: 가스비 장난 아님
전처리 후의 22번째 답변 샘플: 다음 달에는 더 절약해봐요 .


## Step 3. 단어장 만들기 

SubwordTextEncoder 사용: 문장을 자주 등장하는 단어(subword) 단위로 쪼개어 자체 단어를 학습하는 방식

### SubwordTextEncoder

In [6]:
import tensorflow_datasets as tfds

# Q&A 데이터 대해 단어장 생성
tokenizer = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(questions + answers, target_vocab_size = 2**13)

# 토큰에 고유한 정수 부여
BASE_VOCAB = tokenizer.vocab_size
PAD_ID = 0
SOS_ID = BASE_VOCAB + 1
EOS_ID = BASE_VOCAB + 2
VOCAB_SIZE = BASE_VOCAB + 3

print('SOS_ID:',BASE_VOCAB + 1)
print('EOS_ID:',BASE_VOCAB + 2)
print('PAD_ID:', 0)
print('VOCAB_SIZE:',VOCAB_SIZE)

  from .autonotebook import tqdm as notebook_tqdm


SOS_ID: 8168
EOS_ID: 8169
PAD_ID: 0
VOCAB_SIZE: 8170


### Encoding & Padding

In [7]:
# PAD_ID 0 설정으로 인해 전부 1 Shift
def encode_ids(text: str):
    return [SOS_ID] + [num + 1 for num in tokenizer.encode(text)] + [EOS_ID]

In [8]:
q_lens = [len(encode_ids(q)) for q in questions]
a_lens = [len(encode_ids(a)) for a in answers]

In [9]:
def stats(name, arr):
    arr = np.array(arr)
    print(f"[{name}] n={len(arr)} mean={arr.mean():.1f} "
          f"median={np.median(arr):.0f} p90={np.percentile(arr,90):.0f} "
          f"p95={np.percentile(arr,95):.0f} p99={np.percentile(arr,99):.0f} max={arr.max()}")

stats("Q", q_lens)
stats("A", a_lens)

MAX_LEN_Q = int(np.percentile(q_lens, 95))  
MAX_LEN_A = int(np.percentile(a_lens, 95))
print("MAX_LEN_Q =", MAX_LEN_Q, " MAX_LEN_A =", MAX_LEN_A)

# 잘림 비율
def trunc_rate(arr, maxlen):
    arr = np.array(arr); return float((arr > maxlen).mean())
print("trunc(Q)=", trunc_rate(q_lens, MAX_LEN_Q), " trunc(A)=", trunc_rate(a_lens, MAX_LEN_A))

[Q] n=11823 mean=7.5 median=7 p90=11 p95=12 p99=15 max=23
[A] n=11823 mean=7.8 median=7 p90=11 p95=13 p99=16 max=31
MAX_LEN_Q = 12  MAX_LEN_A = 13
trunc(Q)= 0.041867546308043645  trunc(A)= 0.03400152245622938


In [10]:
# 정수 인코딩, 최대 길이 초과 샘플 제거, 패딩
def tokenize_and_filter(inputs, outputs):
    tokenized_inputs, tokenized_outputs = [], []

    for (sentence1, sentence2) in zip(inputs, outputs):
        sentence1 = encode_ids(sentence1)
        sentence2 = encode_ids(sentence2)

        if len(sentence1) <= MAX_LEN_Q and len(sentence2) <= MAX_LEN_A:
            tokenized_inputs.append(sentence1)
            tokenized_outputs.append(sentence2)
    
    tokenized_inputs = tf.keras.preprocessing.sequence.pad_sequences(tokenized_inputs, maxlen=MAX_LEN_Q, padding='post', value = PAD_ID)
    tokenized_outputs = tf.keras.preprocessing.sequence.pad_sequences(tokenized_outputs, maxlen=MAX_LEN_A, padding='post', value = PAD_ID)

    return tokenized_inputs, tokenized_outputs

In [11]:
questions, answers = tokenize_and_filter(questions, answers)
print('단어장의 크기 :',(VOCAB_SIZE))
print('필터링 후의 질문 샘플 개수: {}'.format(len(questions)))
print('필터링 후의 답변 샘플 개수: {}'.format(len(answers)))

단어장의 크기 : 8170
필터링 후의 질문 샘플 개수: 10946
필터링 후의 답변 샘플 개수: 10946


## Step 4. 모델 구성하기

### Teacher Forcing

In [12]:
BATCH_SIZE = 128
BUFFER_SIZE = 10946

dataset = tf.data.Dataset.from_tensor_slices((
    {
        'inputs': questions,
        'dec_inputs': answers[:, :-1]   # EOS 제거
    },
    {
        'outputs': answers[:, 1:]       # SOS 제거
    },
))

dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

### 모델 정의 및 학습

#### Prior Definitions

In [13]:
# positional matrix

class PositionalEncoding(tf.keras.layers.Layer):

    def __init__(self, position, d_model):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(position, d_model)

    # Attention 논문 제시 수식
    # 각도 계산 공식으로, 각 위치와 임베딩 차원마다 서로 다른 주기의 사인/코사인 파형을 생성
    def get_angles(self, position, i, d_model):         
        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return position * angles
    
    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(
            position = tf.range(
                position, 
                dtype = tf.float32)[:, tf.newaxis],
                i = tf.range(d_model, dtype = tf.float32)[tf.newaxis, :],
                d_model = d_model
                )

        sines = tf.math.sin(angle_rads[:, 0::2])        # 짝수 index
        cosines = tf.math.cos(angle_rads[:, 1::2])      # 홀수 index
    
        pos_encoding = tf.stack([sines, cosines], axis = 0)
        pos_encoding = tf.transpose(pos_encoding, [1, 2, 0])
        pos_encoding = tf.reshape(pos_encoding, [position, d_model])
    
        pos_encoding = pos_encoding[tf.newaxis, ...]
        return tf.cast(pos_encoding, tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

In [14]:
# Scaled Dot-Product Attention

def scaled_dot_attention(query, key, value, mask):
    
    # Query, Key의 내적 구함 >> 유사도 행렬 생성
    # matmul: 행렬 곱
    matmul_qk = tf.matmul(query, key, transpose_b = True)

    # scaling
    depth = tf.cast(tf.shape(key)[-1], tf.float32)  
    logits = matmul_qk / tf.math.sqrt(depth)

    # padding 및 미래 시점 단어 가리기 위해 mask 적용
    if mask is not None:
        logits += (mask * -1e9)

    # softmax
    attention_weights = tf.nn.softmax(logits, axis = -1)

    # 최가중치와 Value(V) 결합
    output = tf.matmul(attention_weights, value)

    return output


In [15]:
# multi head attention
# attention을 병렬적으로 활용하여 다양한 패턴 분석

class MultiHeadAttention(tf.keras.layers.Layer):

    def __init__(self, d_model, num_heads, name = 'multi_head_attention'):
        super(MultiHeadAttention, self).__init__(name = name)
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.query_dense = tf.keras.layers.Dense(units = d_model)
        self.key_dense = tf.keras.layers.Dense(units = d_model)
        self.value_dense = tf.keras.layers.Dense(units = d_model)

        self.dense = tf.keras.layers.Dense(units = d_model)
    
    def split_heads(self, inputs, batch_size):
        # (B, L, d_model) -> (B, num_heads, L, depth)
        # d_model 차원을 num_heads 개로 나눔
        inputs = tf.reshape(
            inputs, 
            shape = (batch_size, -1, self.num_heads, self.depth)
            )
        return tf.transpose(inputs, perm=[0, 2, 1, 3])
    
    def call(self, inputs):
        query, key, value, mask = inputs['query'], inputs['key'], inputs['value'], inputs['mask']
        batch_size = tf.shape(query)[0]

        # Q, K, V 각각 Dense 적용
        query = self.query_dense(query)
        key = self.key_dense(key)
        value = self.value_dense(value)

        # 병렬 연산 위해 헤드 분할
        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)

        # Scaled Dot-Product Attention
        scaled_attention = scaled_dot_attention(query, key, value, mask)

        # (B, num_heads, L, depth) -> (B, L, num_heads, depth)
        scaled_attention = tf.transpose(scaled_attention, perm = [0, 2, 1, 3])

        # 모든 헤드 결합: (B, L, num_heads*depth) = (B, L, d_model)
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))

        # 최종 선형 변환
        outputs = self.dense(concat_attention)
        return outputs

In [16]:
# padding masking

def create_padding_mask(x):
    mask = tf.cast(tf.math.equal(x, 0), tf.float32)

    # (batch_size, 1, 1, seqence length)
    return mask[:, tf.newaxis, tf.newaxis, :]

In [17]:
# Look-ahead masking

def create_look_ahead_mask(x):
    seq_len = tf.shape(x)[1]
    look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
    padding_mask = create_padding_mask(x)
    return tf.maximum(look_ahead_mask, padding_mask)

In [18]:
# Encoder
# Two layers

def encoder_layer(units, d_model, num_heads, dropout, name="encoder_layer"):
    inputs = tf.keras.Input(shape=(None, d_model), name="inputs")

    # padding mask
    padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask")

    # first layer: self-attention(multi head)
    attention = MultiHeadAttention(d_model, num_heads, name="attention")({
          'query': inputs,
          'key': inputs,
          'value': inputs,
          'mask': padding_mask
      })

    attention = tf.keras.layers.Dropout(rate=dropout)(attention)
    attention = tf.keras.layers.LayerNormalization(
        epsilon=1e-6)(inputs + attention)

    # second layer
    outputs = tf.keras.layers.Dense(units=units, activation='relu')(attention)
    outputs = tf.keras.layers.Dense(units=d_model)(outputs)

    outputs = tf.keras.layers.Dropout(rate=dropout)(outputs)
    outputs = tf.keras.layers.LayerNormalization(
        epsilon=1e-6)(attention + outputs)

    return tf.keras.Model(
        inputs=[inputs, padding_mask], outputs=outputs, name=name
        )

In [19]:
# Encoder

def encoder(vocab_size,
            num_layers,
            units,
            d_model,
            num_heads,
            dropout,
            name="encoder"):
    inputs = tf.keras.Input(shape=(None,), name="inputs")

    # padding mask
    padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask")

    # embedding layer
    embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
    embeddings *= tf.math.sqrt(tf.cast(d_model, tf.float32))

    # positional encoding
    embeddings = PositionalEncoding(vocab_size, d_model)(embeddings)

    outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)

    # encoder layers followed by num_layers
    for i in range(num_layers):
        outputs = encoder_layer(
            units=units,
            d_model=d_model,
            num_heads=num_heads,
            dropout=dropout,
            name="encoder_layer_{}".format(i),
        )([outputs, padding_mask])

    return tf.keras.Model(
        inputs=[inputs, padding_mask], outputs=outputs, name=name)

In [20]:
# Decoder
# Three layers

def decoder_layer(units, d_model, num_heads, dropout, name="decoder_layer"):
    inputs = tf.keras.Input(shape=(None, d_model), name="inputs")
    enc_outputs = tf.keras.Input(shape=(None, d_model), name="encoder_outputs")
    
    # look ahead mask
    look_ahead_mask = tf.keras.Input(
        shape=(1, None, None), name="look_ahead_mask")
    
    # padding mask
    padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')

    # first layer: self-attention(multi head)
    attention1 = MultiHeadAttention(
        d_model, num_heads, name="attention_1")(inputs={
            'query': inputs,
            'key': inputs,
            'value': inputs,
            'mask': look_ahead_mask
        })

    attention1 = tf.keras.layers.LayerNormalization(
        epsilon=1e-6)(attention1 + inputs)

    # second layer: masked multi head attention
    attention2 = MultiHeadAttention(
        d_model, num_heads, name="attention_2")(inputs={
            'query': attention1,
            'key': enc_outputs,
            'value': enc_outputs,
            'mask': padding_mask
        })

    attention2 = tf.keras.layers.Dropout(rate=dropout)(attention2)
    attention2 = tf.keras.layers.LayerNormalization(
        epsilon=1e-6)(attention2 + attention1)

    # third layer
    outputs = tf.keras.layers.Dense(units=units, activation='relu')(attention2)
    outputs = tf.keras.layers.Dense(units=d_model)(outputs)

    outputs = tf.keras.layers.Dropout(rate=dropout)(outputs)
    outputs = tf.keras.layers.LayerNormalization(
        epsilon=1e-6)(outputs + attention2)

    return tf.keras.Model(
        inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask],
        outputs=outputs,
        name=name)

In [21]:
# Decoder

def decoder(vocab_size,
            num_layers,
            units,
            d_model,
            num_heads,
            dropout,
            name='decoder'):
    inputs = tf.keras.Input(shape=(None,), name='inputs')
    enc_outputs = tf.keras.Input(shape=(None, d_model), name='encoder_outputs')
    look_ahead_mask = tf.keras.Input(
        shape=(1, None, None), name='look_ahead_mask')

    # padding mask
    padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')
    
    # embedding layer
    embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
    embeddings *= tf.math.sqrt(tf.cast(d_model, tf.float32))

    # positional encoding
    embeddings = PositionalEncoding(vocab_size, d_model)(embeddings)

    # Dropout
    outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)

    for i in range(num_layers):
        outputs = decoder_layer(
            units=units,
            d_model=d_model,
            num_heads=num_heads,
            dropout=dropout,
            name='decoder_layer_{}'.format(i),
        )(inputs=[outputs, enc_outputs, look_ahead_mask, padding_mask])

    return tf.keras.Model(
        inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask],
        outputs=outputs,
        name=name)

#### Model define

In [22]:
# transformer

def transformer(vocab_size,
                num_layers,
                units,
                d_model,
                num_heads,
                dropout,
                name="transformer"):
    inputs = tf.keras.Input(shape=(None,), name="inputs")
    dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")

    # encoder padding mask
    enc_padding_mask = tf.keras.layers.Lambda(
        create_padding_mask, output_shape=(1, 1, None),
        name='enc_padding_mask')(inputs)

    # decoder look ahead mask
    look_ahead_mask = tf.keras.layers.Lambda(
        create_look_ahead_mask,
        output_shape=(1, None, None),
        name='look_ahead_mask')(dec_inputs)

    # decoder padding mask
    dec_padding_mask = tf.keras.layers.Lambda(
        create_padding_mask, output_shape=(1, 1, None),
        name='dec_padding_mask')(inputs)

    # Encoder
    enc_outputs = encoder(
        vocab_size=vocab_size,
        num_layers=num_layers,
        units=units,
        d_model=d_model,
        num_heads=num_heads,
        dropout=dropout,
    )(inputs=[inputs, enc_padding_mask])

    # Decoder
    dec_outputs = decoder(
        vocab_size=vocab_size,
        num_layers=num_layers,
        units=units,
        d_model=d_model,
        num_heads=num_heads,
        dropout=dropout,
    )(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])

    # 완전연결층
    outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)

    return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)

In [23]:
tf.keras.backend.clear_session()

# Hyperparameters
NUM_LAYERS = 2      # 인코더와 디코더의 층의 개수
D_MODEL = 256       # 인코더와 디코더 내부의 입, 출력의 고정 차원
NUM_HEADS = 8       # 멀티 헤드 어텐션에서의 헤드 수 
UNITS = 512         # 피드 포워드 신경망의 은닉층의 크기
DROPOUT = 0.1       # 드롭아웃의 비율

model = transformer(
    vocab_size=VOCAB_SIZE,
    num_layers=NUM_LAYERS,
    units=UNITS,
    d_model=D_MODEL,
    num_heads=NUM_HEADS,
    dropout=DROPOUT)

model.summary()

Model: "transformer"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputs (InputLayer)            [(None, None)]       0           []                               
                                                                                                  
 dec_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 enc_padding_mask (Lambda)      (None, 1, 1, None)   0           ['inputs[0][0]']                 
                                                                                                  
 encoder (Functional)           (None, None, 256)    3145728     ['inputs[0][0]',                 
                                                                  'enc_padding_mask[0][0

#### Loss Function & Metrics(ACC)

In [24]:
def loss_function(y_true, y_pred):
    y_true = tf.cast(y_true, tf.int32)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')(y_true, y_pred)
    mask = tf.cast(tf.not_equal(y_true, 0), loss.dtype) 
    loss = tf.multiply(loss, mask)
    return tf.reduce_mean(tf.reduce_sum(loss * mask, axis=1) / (tf.reduce_sum(mask, axis=1) + 1e-9)) 

def masked_accuracy(y_true, y_pred):
    y_true = tf.cast(y_true, tf.int32)
    y_hat = tf.argmax(y_pred, axis=-1, output_type=y_true.dtype) 
    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
    correct = tf.cast(tf.equal(y_true, y_hat), tf.float32) * mask
    return tf.reduce_sum(correct) / (tf.reduce_sum(mask) + 1e-9)


#### Custom Learning rate Scheduling

In [25]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

    def __init__(self, d_model, warmup_steps=500):
        super(CustomSchedule, self).__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps**-1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

#### Model Compile & Training

In [26]:
learning_rate = CustomSchedule(D_MODEL)

optimizer = tf.keras.optimizers.Adam(
    learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

model.compile(
    optimizer = optimizer, 
    loss = loss_function, 
    metrics = [masked_accuracy]
    )

In [27]:
EPOCHS = 20
model.fit(dataset, epochs=EPOCHS, verbose=1)

# Epoch 20/20 loss: 0.1133 - masked_accuracy: 0.9696

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x16689e77130>

## Step 5. 모델 평가하기

### 예측하기

In [28]:
def decoder_inference(sentence):
    s = preprocessing_sentence(sentence)

    # 인코더 입력: [SOS] + (encode +1) + [EOS]
    enc_ids = [SOS_ID] + [tid + 1 for tid in tokenizer.encode(s)] + [EOS_ID]
    enc = tf.constant([enc_ids], dtype=tf.int32)          # (1, T_enc)

    # 디코더 시작 시퀀스: [SOS]
    dec = tf.constant([[SOS_ID]], dtype=tf.int32)         # (1, 1)

    for _ in range(MAX_LEN_A - 1):                       # 최대 길이만큼 생성
        out = model({'inputs': enc, 'dec_inputs': dec}, training=False)
        logits = out['outputs'] if isinstance(out, dict) else out  # (1, t, V)

        # 마지막 시점에서 다음 토큰 고르기 (greedy)
        next_id = tf.argmax(logits[:, -1, :], axis=-1, output_type=tf.int32)  # (1,)

        # EOS면 종료
        if next_id[0] == EOS_ID:
            break

        # 디코더 시퀀스에 붙이기
        dec = tf.concat([dec, tf.expand_dims(next_id, 1)], axis=1)  # (1, t+1)

    # 최종 생성 ID 시퀀스 (1, T) -> (T,)
    gen_ids = dec.numpy().tolist()[0]

    # 특수토큰 제거 + 시프트 복원(-1)
    toks = [tid - 1 for tid in gen_ids if tid not in (SOS_ID, EOS_ID, PAD_ID)]

    return tokenizer.decode(toks)

In [29]:
def sentence_generation(sentence):
    pred = decoder_inference(sentence)
    print('입력 :', sentence)
    print('출력 :', pred)
    return pred

In [30]:
sentence_generation('안녕')

입력 : 안녕
출력 : 안녕하세요 .


'안녕하세요 .'

In [31]:
sentence_generation('이름이 뭐야?')

입력 : 이름이 뭐야?
출력 : 위로봇이요 .


'위로봇이요 .'

In [32]:
sentence_generation('위로봇아 나 좀 도와줘')

입력 : 위로봇아 나 좀 도와줘
출력 : 저는 사람으로 태어나고 싶어요 .


'저는 사람으로 태어나고 싶어요 .'

In [33]:
sentence_generation('넌 생일이 언제야?')

입력 : 넌 생일이 언제야?
출력 : 각자의 삶을 살고 있겠지요 .


'각자의 삶을 살고 있겠지요 .'

In [34]:
sentence_generation('Execuse me?')

입력 : Execuse me?
출력 : 누구보다 조금씩 고치세요 .


'누구보다 조금씩 고치세요 .'