In [None]:
import sys
import re
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import matplotlib.pyplot as plt  # 追加
import numpy as np
import random
import sys
import io
from keras.callbacks import EarlyStopping 
from keras.models import load_model

path = 'kakugen.txt'
bindata = open(path, "rb")
lines = bindata.readlines()
for line in lines:
    text = line.decode('Shift_JIS')   # Shift_JISで読み込み
    text = re.split(r'\r',text)[0]     # 改行削除
    text = re.split(r'\n',text)[0]     # 改行削除
    print(text)
    file = open('data_kakugen.txt','a',encoding='utf-8').write(text)  # UTF-8に変換
 

In [None]:
path = 'data_kakugen.txt'
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))
 
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
 
maxlen = 4
step = 1
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))
 
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
 
 
# シンプルなLSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))
 
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
 
# 次の言葉を選択するヘルパー 
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)
 
#文章生成の過程確認用
def on_epoch_end(epoch, logs):
    # 1エポックごとに文章生成
    print()
    print('----- Generating text after Epoch: %d' % epoch)
 
    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2]:  # diversity = 0.2 のみとする
        print('----- diversity:', diversity)
 
        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
 
        for i in range(60):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.
 
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
 
            generated += next_char
            sentence = sentence[1:] + next_char
 
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
 
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

# val_lossに改善が見られなくなってから、10エポックで学習は終了
early_stopping = EarlyStopping(monitor="val_loss", patience=5) 
 
history = model.fit(x, y,
                    batch_size=128,
                    epochs=150,
                    callbacks=[early_stopping])
 
# 訓練の経過を保存
# loss = history.history["loss"]
# epochs = range(1, len(loss) + 1)
# plt.plot(epochs, loss, "bo", label = "Training loss" )
# plt.title("Training loss")
# plt.legend()
# plt.savefig("loss.png")
# plt.close()

# モデルの保存
model.save('kakugen_model.h5')

In [None]:
# モデルの読み込み
model = load_model('kakugen_model.h5')

In [None]:
# 格言の生成
def make_sentence():
    start_index = random.randint(0, 19000)
    maxlen = 4
    max_length = 100

    # 次の言葉を選択するヘルパー
    def sample(preds, temperature=0.5):
        preds = np.asarray(preds).astype('float64')
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        probas = np.random.multinomial(1, preds, 1)
        return np.argmax(probas)

    generated = ''
    # 文章生成のための最初の言葉を取得
    sentence = text[start_index: start_index + maxlen]
    generated += sentence

    #末尾に初めて“。”がつくまで文章を生成
    while True:
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, 2)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        if (next_char in ("。", "!")):
            generated = ""
            break  # それまで生成したものは削除する

    while True:
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, 0.2)
        next_char = indices_char[next_index]

        if ((50 <= len(generated) < max_length and next_char in ("。", "!")) or len(generated) > max_length):
            generated += "。"
            break  # 60字以上、設定文字数未満で次の文字が。のとき、もしくは最大文字数を超えたときは終了

        generated += next_char
        sentence = sentence[1:] + next_char
        
    return generated

In [None]:
print(make_sentence())