# RNN(Recurrent Neural Network, 순환 신경망)

---
##### model.add(SimpleRNN(hidden_size, input_shape=(timesteps, input_dim))) 
##### = model.add(SimpleRNN(hidden_size, input_length=M, input_dim=N))

<br>

- hidden_size: 은닉 상태의 크기를 정의. 메모리 셀이 다음 시점의 메모리 셀과 출력층으로 보내는 값의 크기(output_dim)와 동일. 중소형 모델의 경우 128, 256, 512, 1024 등의 값을 사용
- timeseps: 입력 시퀀스의 길이 (input_length로 표현하기도 함)
- input_dim: 입력의 크기
---

In [2]:
from keras.models import Sequential
from keras.layers import SimpleRNN

model = Sequential()
# model.add(SimpleRNN(3, input_shape=(2,10)))
model.add(SimpleRNN(3, input_length=2, input_dim=10))
model.summary()

# (10(입력층) * 3(은닉층)) + (3(은닉층) * 3(은닉층)) + 3(출력층)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_1 (SimpleRNN)    (None, 3)                 42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [3]:
from keras.models import Sequential
from keras.layers import SimpleRNN

model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10)))
model.summary()

# batch_input_shape(batch_size, timesteps, input_dim)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_2 (SimpleRNN)    (8, 3)                    42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [4]:
from keras.models import Sequential
from keras.layers import SimpleRNN

model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10), return_sequences=True))
model.summary()

# return_sequnces=True를 주면 매 타임상태의 결과를 출력한다.

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_3 (SimpleRNN)    (8, 2, 3)                 42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


---

## RNN을 이용하여 텍스트 생성하기

### 1. 데이터에 대한 이해와 전처리

In [80]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras.utils import to_categorical

text = """경마장에 있는 말이 뛰고 있다\n그의 말이 법이다\n가는 말이 고와야 오는 말이 곱다\n"""
t = Tokenizer()
t.fit_on_texts([text])
vocab_size = len(t.word_index) + 1   # 실제 값보다 하나 더 크게 설계
print("단어 집합의 크기: %d" % vocab_size)

단어 집합의 크기: 12


#####  - 각 단어와 단어에 부여된 정수 인덱스 출력

In [81]:
print(t.word_index)

{'말이': 1, '경마장에': 2, '있는': 3, '뛰고': 4, '있다': 5, '그의': 6, '법이다': 7, '가는': 8, '고와야': 9, '오는': 10, '곱다': 11}


In [82]:
sequences = list()

for line in text.split('\n'):
#     print(t.texts_to_sequences([line]))
    encoded = t.texts_to_sequences([line])[0]
#     print(encoded)
    
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)
        
print("학습에 사용할 샘플의 개수: %d" % len(sequences))
print(sequences)

# 학습할 데이터를 생성하는 과정

학습에 사용할 샘플의 개수: 11
[[2, 3], [2, 3, 1], [2, 3, 1, 4], [2, 3, 1, 4, 5], [6, 1], [6, 1, 7], [8, 1], [8, 1, 9], [8, 1, 9, 10], [8, 1, 9, 10, 1], [8, 1, 9, 10, 1, 11]]


In [83]:
max_len = max(len(I) for I in sequences)
print("샘플의 최대 길이: {}".format(max_len))

샘플의 최대 길이: 6


##### - 전체 샘플의 길이를 6으로 패딩

In [84]:
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')
print(sequences)

[[ 0  0  0  0  2  3]
 [ 0  0  0  2  3  1]
 [ 0  0  2  3  1  4]
 [ 0  2  3  1  4  5]
 [ 0  0  0  0  6  1]
 [ 0  0  0  6  1  7]
 [ 0  0  0  0  8  1]
 [ 0  0  0  8  1  9]
 [ 0  0  8  1  9 10]
 [ 0  8  1  9 10  1]
 [ 8  1  9 10  1 11]]


In [85]:
sequences = np.array(sequences)
X = sequences[:, :-1]
y = sequences[:, -1]

print("X:\n", X)
print("y:\n", y)

X:
 [[ 0  0  0  0  2]
 [ 0  0  0  2  3]
 [ 0  0  2  3  1]
 [ 0  2  3  1  4]
 [ 0  0  0  0  6]
 [ 0  0  0  6  1]
 [ 0  0  0  0  8]
 [ 0  0  0  8  1]
 [ 0  0  8  1  9]
 [ 0  8  1  9 10]
 [ 8  1  9 10  1]]
y:
 [ 3  1  4  5  1  7  1  9 10  1 11]


##### - 원-핫 인코딩 수행

In [86]:
y = to_categorical(y, num_classes=vocab_size)
print(y)

# keras에서 to_categorical은 원-핫 인코딩
# 단어는 1부터 11까지 11개인데, 원-핫 인코딩 수행시 실제로 0-10까지 결과값밖에 나오지 않음. 
# 그래서 상단에서 vocal_size에 1을 더해줌으로 0-11까지 사용할 수 있게 해준다.

[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


### 2.  모델 설계하기

---
<h3><span style="color:#ff6600;">tf.keras.layers.Embedding</span><br><span style="font-weight:500; color:#666;">(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None, **kwargs)</span></h3>

<br>

- 인수:
    - input_dim: 입력 크기
    - output_dim: 출력 크기
    - input_length: 입력 데이터의 길이

- 역할: 
    - 단어를 밀집 벡터로 만드는 일을 수행한다. 
    - 정수 인코딩이 된 단어들을 입력으로 받아 수행한다. 
    - 단어를 랜덤한 값을 가지는 밀집 벡터로 변환한 뒤에, 인공 신경망의 가중치를 학습하는 것과 같은 방식으로 단어 벡터를 학습하는 방법을 사용한다.
    
    
---

In [87]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(vocab_size, 7, input_length=max_len-1))
model.add(SimpleRNN(32))
model.add(Dense(vocab_size, activation='softmax'))
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, 5, 7)              84        
                                                                 
 simple_rnn_11 (SimpleRNN)   (None, 32)                1280      
                                                                 
 dense_6 (Dense)             (None, 12)                396       
                                                                 
Total params: 1,760
Trainable params: 1,760
Non-trainable params: 0
_________________________________________________________________


![image-2.png](attachment:image-2.png)

In [88]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=200, verbose=2)

Epoch 1/200
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
1/1 - 1s - loss: 2.4891 - accuracy: 0.0909 - 572ms/epoch - 572ms/step
Epoch 2/200
1/1 - 0s - loss: 2.4776 - accuracy: 0.0909 - 3ms/epoch - 3ms/step
Epoch 3/200
1/1 - 0s - loss: 2.4664 - accuracy: 0.0909 - 2ms/epoch - 2ms/step
Epoch 4/200
1/1 - 0s - loss: 2.4554 - accuracy: 0.0909 - 6ms/epoch - 6ms/step
Epoch 5/200
1/1 - 0s - loss: 2.4444 - accuracy: 0.1818 - 3ms/epoch - 3ms/step
Epoch 6/200
1/1 - 0s - loss: 2.4334 - accuracy: 0.1818 - 3ms/epoch - 3ms/step
Epoch 7/200
1/1 - 0s - loss: 2.4220 - accuracy: 0.1818 - 6ms/epoch - 6ms/step
Epoch 8/200
1/1 

1/1 - 0s - loss: 0.8940 - accuracy: 0.7273 - 3ms/epoch - 3ms/step
Epoch 94/200
1/1 - 0s - loss: 0.8784 - accuracy: 0.7273 - 2ms/epoch - 2ms/step
Epoch 95/200
1/1 - 0s - loss: 0.8630 - accuracy: 0.7273 - 3ms/epoch - 3ms/step
Epoch 96/200
1/1 - 0s - loss: 0.8477 - accuracy: 0.7273 - 2ms/epoch - 2ms/step
Epoch 97/200
1/1 - 0s - loss: 0.8325 - accuracy: 0.7273 - 4ms/epoch - 4ms/step
Epoch 98/200
1/1 - 0s - loss: 0.8175 - accuracy: 0.7273 - 2ms/epoch - 2ms/step
Epoch 99/200
1/1 - 0s - loss: 0.8027 - accuracy: 0.7273 - 2ms/epoch - 2ms/step
Epoch 100/200
1/1 - 0s - loss: 0.7880 - accuracy: 0.7273 - 3ms/epoch - 3ms/step
Epoch 101/200
1/1 - 0s - loss: 0.7735 - accuracy: 0.8182 - 2ms/epoch - 2ms/step
Epoch 102/200
1/1 - 0s - loss: 0.7593 - accuracy: 0.8182 - 2ms/epoch - 2ms/step
Epoch 103/200
1/1 - 0s - loss: 0.7452 - accuracy: 0.8182 - 2ms/epoch - 2ms/step
Epoch 104/200
1/1 - 0s - loss: 0.7314 - accuracy: 0.8182 - 3ms/epoch - 3ms/step
Epoch 105/200
1/1 - 0s - loss: 0.7178 - accuracy: 0.8182 - 2

Epoch 196/200
1/1 - 0s - loss: 0.1399 - accuracy: 1.0000 - 4ms/epoch - 4ms/step
Epoch 197/200
1/1 - 0s - loss: 0.1372 - accuracy: 1.0000 - 2ms/epoch - 2ms/step
Epoch 198/200
1/1 - 0s - loss: 0.1345 - accuracy: 1.0000 - 9ms/epoch - 9ms/step
Epoch 199/200
1/1 - 0s - loss: 0.1319 - accuracy: 1.0000 - 2ms/epoch - 2ms/step
Epoch 200/200
1/1 - 0s - loss: 0.1294 - accuracy: 1.0000 - 3ms/epoch - 3ms/step


<keras.callbacks.History at 0x1bc41a42b88>

In [90]:
def sentence_generation(model, t, current_word, n):   # 모델, 토크나이저, 현재 단어, 반복할 횟수
    init_word = current_word   # 처음 들어온 단어도 마지막에 같이 출력하기 위해 저장
    sentence = ''
    
    for _ in range(n):
        encoded = t.texts_to_sequences([current_word])[0]   # 현재 단어에 대한 정수 인코딩
        encoded = pad_sequences([encoded], maxlen=5, padding='pre')   # 데이터에 대한 패딩
        
        # result = model.predict_classes(encoded, verbose=0)
        # 버전 변경으로 수정 필요!!
        temp = model.predict(encoded, verbose=0)
        result = np.argmax(temp, axis=-1)
        
        for word, index in t.word_index.items():
            if index == result:
                break
            # 만약 예측한 단어의 인덱스와 동일한 단어가 있다면, 해당 단어가 예측 단어이므로 break
                
        current_word = current_word + ' ' + word   # 현재 단어+''+예측단어를 현재 단어로 변경
        sentence = sentence + ' ' + word   # 예측 단어를 문장에 저장
        
    sentence = init_word + sentence
    return sentence

print(sentence_generation(model, t, '경마장에', 4))
print(sentence_generation(model, t, '그의', 2))
print(sentence_generation(model, t, '가는', 5))

경마장에 있는 말이 뛰고 있다
그의 말이 법이다
가는 말이 고와야 오는 말이 곱다


---
## LSTM을 이용하여 텍스트 생성하기

### 1. 데이터에 대한 이해와 전처리

In [118]:
import pandas as pd
from string import punctuation
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras.utils import to_categorical

df = pd.read_csv("./datasets/ArticlesApril2018.csv")
df.head()

Unnamed: 0,articleID,articleWordCount,byline,documentType,headline,keywords,multimedia,newDesk,printPage,pubDate,sectionName,snippet,source,typeOfMaterial,webURL
0,5adf6684068401528a2aa69b,781,By JOHN BRANCH,article,Former N.F.L. Cheerleaders’ Settlement Offer: ...,"['Workplace Hazards and Violations', 'Football...",68,Sports,0,2018-04-24 17:16:49,Pro Football,"“I understand that they could meet with us, pa...",The New York Times,News,https://www.nytimes.com/2018/04/24/sports/foot...
1,5adf653f068401528a2aa697,656,By LISA FRIEDMAN,article,E.P.A. to Unveil a New Rule. Its Effect: Less ...,"['Environmental Protection Agency', 'Pruitt, S...",68,Climate,0,2018-04-24 17:11:21,Unknown,The agency plans to publish a new regulation T...,The New York Times,News,https://www.nytimes.com/2018/04/24/climate/epa...
2,5adf4626068401528a2aa628,2427,By PETE WELLS,article,"The New Noma, Explained","['Restaurants', 'Noma (Copenhagen, Restaurant)...",66,Dining,0,2018-04-24 14:58:44,Unknown,What’s it like to eat at the second incarnatio...,The New York Times,News,https://www.nytimes.com/2018/04/24/dining/noma...
3,5adf40d2068401528a2aa619,626,By JULIE HIRSCHFELD DAVIS and PETER BAKER,article,Unknown,"['Macron, Emmanuel (1977- )', 'Trump, Donald J...",68,Washington,0,2018-04-24 14:35:57,Europe,President Trump welcomed President Emmanuel Ma...,The New York Times,News,https://www.nytimes.com/2018/04/24/world/europ...
4,5adf3d64068401528a2aa60f,815,By IAN AUSTEN and DAN BILEFSKY,article,Unknown,"['Toronto, Ontario, Attack (April, 2018)', 'Mu...",68,Foreign,0,2018-04-24 14:21:21,Canada,"Alek Minassian, 25, a resident of Toronto’s Ri...",The New York Times,News,https://www.nytimes.com/2018/04/24/world/canad...


In [119]:
print("열의 개수:", len(df.columns))

열의 개수: 15


In [120]:
df.columns

Index(['articleID', 'articleWordCount', 'byline', 'documentType', 'headline',
       'keywords', 'multimedia', 'newDesk', 'printPage', 'pubDate',
       'sectionName', 'snippet', 'source', 'typeOfMaterial', 'webURL'],
      dtype='object')

In [121]:
df['headline'].isnull().values.any()

False

In [122]:
headline = []
headline.extend(list(df.headline.values))
headline[:5]

['Former N.F.L. Cheerleaders’ Settlement Offer: $1 and a Meeting With Goodell',
 'E.P.A. to Unveil a New Rule. Its Effect: Less Science in Policymaking.',
 'The New Noma, Explained',
 'Unknown',
 'Unknown']

In [123]:
print("총 샘플의 개수: {}".format(len(headline)))

총 샘플의 개수: 1324


In [124]:
headline = [n for n in headline if n != "Unknown"]
print("노이즈 값 제거 후 샘플의 개수: {}".format(len(headline)))

노이즈 값 제거 후 샘플의 개수: 1214


In [125]:
headline[:5]

['Former N.F.L. Cheerleaders’ Settlement Offer: $1 and a Meeting With Goodell',
 'E.P.A. to Unveil a New Rule. Its Effect: Less Science in Policymaking.',
 'The New Noma, Explained',
 'How a Bag of Texas Dirt  Became a Times Tradition',
 'Is School a Place for Self-Expression?']

In [140]:
punctuation

# 구두점

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [126]:
def repreprocessing(s):
    s = s.encode("utf8").decode("ascii", 'ignore')
    return ''.join(c for c in s if c not in punctuation).lower()

text = [repreprocessing(x) for x in headline]
text[:5]

['former nfl cheerleaders settlement offer 1 and a meeting with goodell',
 'epa to unveil a new rule its effect less science in policymaking',
 'the new noma explained',
 'how a bag of texas dirt  became a times tradition',
 'is school a place for selfexpression']

In [127]:
t = Tokenizer()
t.fit_on_texts(text)
vocab_size = len(t.word_index) + 1
print("단어 집합의 크기: %d" % vocab_size)

단어 집합의 크기: 3494


In [128]:
sequences = list()

for line in text:
    encoded = t.texts_to_sequences([line])[0]
    
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)
        
sequences[:11]

[[99, 269],
 [99, 269, 371],
 [99, 269, 371, 1115],
 [99, 269, 371, 1115, 582],
 [99, 269, 371, 1115, 582, 52],
 [99, 269, 371, 1115, 582, 52, 7],
 [99, 269, 371, 1115, 582, 52, 7, 2],
 [99, 269, 371, 1115, 582, 52, 7, 2, 372],
 [99, 269, 371, 1115, 582, 52, 7, 2, 372, 10],
 [99, 269, 371, 1115, 582, 52, 7, 2, 372, 10, 1116],
 [100, 3]]

In [146]:
index_to_word = {}

for key, value in t.word_index.items():
    index_to_word[value] = key
    
print("빈도수 상위 582번 단어: {}".format(index_to_word[582]))
print("빈도수 상위 1번 단어: {}".format(index_to_word[1]))
print("빈도수 상위 50번 단어: {}".format(index_to_word[50]))

빈도수 상위 582번 단어: offer
빈도수 상위 1번 단어: the
빈도수 상위 50번 단어: recap


In [145]:
max_len = max(len(I) for I in sequences)
print("샘플의 최대 길이: {}".format(max_len))

샘플의 최대 길이: 24


##### - 가장 긴 샘플의 길이인 24로 모든 샘플의 길이를 패딩

In [130]:
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')
print(sequences[:3])

[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0   99  269]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0   99  269  371]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0   99  269  371 1115]]


In [131]:
sequences = np.array(sequences)
X = sequences[:, :-1]
y = sequences[:, -1]
X[:3]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,  99],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,  99, 269],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,  99, 269, 371]])

In [132]:
y[:3]

array([ 269,  371, 1115])

##### - 원-핫 인코딩

In [133]:
y = to_categorical(y, num_classes=vocab_size)
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

### 2. 모델 설계하기

In [135]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM

model = Sequential()

model.add(Embedding(vocab_size, 10, input_length=max_len-1))
model.add(LSTM(128))
model.add(Dense(vocab_size, activation='softmax'))
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 23, 10)            34940     
                                                                 
 lstm (LSTM)                 (None, 128)               71168     
                                                                 
 dense_7 (Dense)             (None, 3494)              450726    
                                                                 
Total params: 556,834
Trainable params: 556,834
Non-trainable params: 0
_________________________________________________________________


In [136]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=200, verbose=2)

Epoch 1/200
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
244/244 - 4s - loss: 7.6275 - accuracy: 0.0245 - 4s/epoch - 17ms/step
Epoch 2/200
244/244 - 3s - loss: 7.1135 - accuracy: 0.0296 - 3s/epoch - 12ms/step
Epoch 3/200
244/244 - 3s - loss: 6.9782 - accuracy: 0.0333 - 3s/epoch - 12ms/step
Epoch 4/200
244/244 - 3s - loss: 6.8539 - accuracy: 0.0396 - 3s/epoch - 12ms/step
Epoch 5/200
244/244 - 3s - loss: 6.7022 - accuracy: 0.0443 - 3s/epoch - 12ms/step
Epoch 6/200
244/244 - 3s - loss: 6.5203 - accuracy: 0.0479 - 3s/epoch - 13ms/step
Epoch 7/200
244/244 - 3s - loss: 6.3243 - accuracy: 0.0519 - 3s/epoch - 13

Epoch 89/200
244/244 - 3s - loss: 0.8955 - accuracy: 0.8246 - 3s/epoch - 14ms/step
Epoch 90/200
244/244 - 3s - loss: 0.8762 - accuracy: 0.8270 - 3s/epoch - 13ms/step
Epoch 91/200
244/244 - 3s - loss: 0.8546 - accuracy: 0.8324 - 3s/epoch - 13ms/step
Epoch 92/200
244/244 - 3s - loss: 0.8398 - accuracy: 0.8360 - 3s/epoch - 13ms/step
Epoch 93/200
244/244 - 3s - loss: 0.8204 - accuracy: 0.8392 - 3s/epoch - 13ms/step
Epoch 94/200
244/244 - 3s - loss: 0.8004 - accuracy: 0.8421 - 3s/epoch - 13ms/step
Epoch 95/200
244/244 - 3s - loss: 0.7840 - accuracy: 0.8448 - 3s/epoch - 13ms/step
Epoch 96/200
244/244 - 3s - loss: 0.7659 - accuracy: 0.8485 - 3s/epoch - 13ms/step
Epoch 97/200
244/244 - 3s - loss: 0.7502 - accuracy: 0.8503 - 3s/epoch - 13ms/step
Epoch 98/200
244/244 - 3s - loss: 0.7355 - accuracy: 0.8534 - 3s/epoch - 13ms/step
Epoch 99/200
244/244 - 3s - loss: 0.7201 - accuracy: 0.8592 - 3s/epoch - 13ms/step
Epoch 100/200
244/244 - 3s - loss: 0.7047 - accuracy: 0.8592 - 3s/epoch - 13ms/step
Epo

Epoch 187/200
244/244 - 3s - loss: 0.2710 - accuracy: 0.9177 - 3s/epoch - 14ms/step
Epoch 188/200
244/244 - 3s - loss: 0.2714 - accuracy: 0.9175 - 3s/epoch - 14ms/step
Epoch 189/200
244/244 - 3s - loss: 0.2705 - accuracy: 0.9157 - 3s/epoch - 14ms/step
Epoch 190/200
244/244 - 3s - loss: 0.2701 - accuracy: 0.9168 - 3s/epoch - 14ms/step
Epoch 191/200
244/244 - 3s - loss: 0.2694 - accuracy: 0.9157 - 3s/epoch - 14ms/step
Epoch 192/200
244/244 - 3s - loss: 0.2694 - accuracy: 0.9168 - 3s/epoch - 14ms/step
Epoch 193/200
244/244 - 4s - loss: 0.2742 - accuracy: 0.9136 - 4s/epoch - 14ms/step
Epoch 194/200
244/244 - 4s - loss: 0.2980 - accuracy: 0.9130 - 4s/epoch - 15ms/step
Epoch 195/200
244/244 - 3s - loss: 0.2767 - accuracy: 0.9164 - 3s/epoch - 14ms/step
Epoch 196/200
244/244 - 3s - loss: 0.2687 - accuracy: 0.9163 - 3s/epoch - 14ms/step
Epoch 197/200
244/244 - 3s - loss: 0.2679 - accuracy: 0.9159 - 3s/epoch - 14ms/step
Epoch 198/200
244/244 - 3s - loss: 0.2656 - accuracy: 0.9162 - 3s/epoch - 14

<keras.callbacks.History at 0x1bc41a42dc8>

In [137]:
def sentence_generation(model, t, current_word, n):
    init_word = current_word
    sentence = ''
    
    for _ in range(n):
        encoded = t.texts_to_sequences([current_word])[0]
        encoded = pad_sequences([encoded], maxlen=23, padding='pre')
        temp = model.predict(encoded, verbose=1)
        result = np.argmax(temp, axis=-1)
        
        for word, index in t.word_index.items():
            if index == result:
                break
                
        current_word = current_word + ' ' + word
        sentence = sentence + ' ' + word
        
    sentence = init_word + sentence
    return sentence

In [141]:
print("*** 임의의 단어 'i'로 문장 만들기 ***")
print(sentence_generation(model, t, 'i', 10))

*** 임의의 단어 'i'로 문장 만들기 ***
i want to be rich and im not sorry think indian


In [142]:
print("*** 임의의 단어 'how'로 문장 만들기 ***")
print(sentence_generation(model, t, 'how', 10))

*** 임의의 단어 'how'로 문장 만들기 ***
how to make facebook more accountable wont too cia i losing


---

## 영화 리뷰 예제

1. JDK 설치
    - https://www.oracle.com/kr/java/technologies/downloads/#java8-windows
    - 
2. Jpype1 설치
    - https://www.lfd.uci.edu/~gohlke/pythonlibs/#jpype
    - JPype1‑1.3.0‑cp37‑cp37m‑win_amd64.whl 설치
    
3. konlpy 설치