# 실습 [13-1]<br>
**실습명: LSTM 네트워크를 이용한 자연어 생성**<br>

In [1]:
#관련 라이브러리 불러오기
from __future__ import print_function
from tensorflow.keras.callbacks import LambdaCallback

#LSTM 네트워크 구조 짜기 위해 필요함
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop

from tensorflow.keras.utils import get_file #텍스트 파일 불러오는 용도
import numpy as np
import random
import sys
import io

In [2]:
#텍스트 파일 불러오기
fpath = get_file(
    'nietzsche.txt',
    origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(fpath, encoding='utf-8') as f:
  text = f.read().lower() #neitzsche 텍스트 파일 모든 단어 소문자로 읽기

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt


In [3]:
#어휘 사전 생성
chars = sorted(list(set(text)))
char2index = dict((c,i) for i,c in enumerate(chars)) #text내 단어들을 인덱스화
index2char = dict((i,c) for i,c in enumerate(chars)) #인덱스를 단어로 변환

In [4]:
#음절 단위의 학습 데이터 생성
maxlen, step = 40, 3
sentences, next_chars = [], []

for i in range(0, len(text)-maxlen, step): #text의 길이에서 완전한 음절, 3개씩 건너뛰며 진행
  sentences.append(text[i : i+maxlen])
  next_chars.append(text[i+maxlen])

print('The number of sentences :', len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i, t, char2index[char]] = 1
  y[i, char2index[next_chars[i]]] = 1

The number of sentences : 200285


In [5]:
#LSTM 딥러닝 모델 선언
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [6]:
#입력된 확률값에 따라 다음 음절 샘플링
  #샘플링: 자신이 생성할 샘플을 다음 샘플의 입력값으로 제공
  #학습을 진행하면서 확률에 따라 샘플링을 진행하거나 정답 정보를 제공받을 수 있음

def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

In [8]:
#1 epoch씩 학습 진행
def on_epoch_end(epoch, _):
  print('\nEpoch : %d' % epoch)
  start_index = random.randint(0, len(text) - maxlen-1)

  for diversity in [0.2, 0.5, 1.0, 1.2]: #diversity rate에 맞춰 학습 진행
    print('\nDiversity :', diversity)
    generated = ''  #생성된 문장 담기
    sentence = text[start_index : start_index+maxlen]
    generated += sentence #문장에 생성된 문장 덧붙이기
    print('Seed : %s' % sentence)
    sys.stdout.write(generated)

    for i in range(400):
      x_pred = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(sentence):
        x_pred[0, t, char2index[char]] = 1.0
      preds = model.predict(x_pred, verbose=0)[0]
      next_index = sample(preds, diversity)
      next_char = index2char[next_index]
      sentence = sentence[1:] + next_char
      sys.stdout.write(next_char)
      sys.stdout.flush()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
model.fit(x, y,
          batch_size=128,
          epochs=1,
          callbacks=[print_callback])


Epoch : 0

Diversity : 0.2
Seed : ue must be
connected with self denial. w
ue must be
connected with self denial. when the interpreted the sense of the sense of the contradiction when the sense of the sense of the more and in the head the sense of the sense is the sentiment of the most self-contrainity of the world of the present the strengen of the strenger and the world be a man who have a consting to the interpreted the person of the strengen of the present of the strengen of the most present the most somet
Diversity : 0.5
Seed : ue must be
connected with self denial. w
ue must be
connected with self denial. we have his ready the interest pain in the desire and desire the sentiment of the stard when he want it is well the leader and respection and metaphysic respection of the strong stronger insight the something the sentiment of the perceative of the something and method of the desermonative of the strong things we ever which is the new most distinction of the distrest in which th

<tensorflow.python.keras.callbacks.History at 0x7f8391a5d110>