# 確率モデル

## データの読み込み

In [1]:
import io

path = "data.txt"
with io.open(path, encoding="utf-8") as f:
    data = f.read().split()
    
data

['100名まで収容可能な会場。',
 'ドレスのご試着は、',
 'ご要望にお応えします。',
 '写真撮影を行います。',
 '宜しくお願い致します。',
 '私は犬を散歩する。']

## 前処理 (分かち書き)

In [2]:
from janome.tokenizer import Tokenizer

t = Tokenizer()
def wakati(text):
    w = t.tokenize(text, wakati=True)
    return " ".join(w)

data = [wakati(w) for w in data]

data

['100 名 まで 収容 可能 な 会場 。',
 'ドレス の ご 試着 は 、',
 'ご 要望 に お 応え し ます 。',
 '写真 撮影 を 行い ます 。',
 '宜しく お願い 致し ます 。',
 '私 は 犬 を 散歩 する 。']

KerasのTokenizerでベクトル化を行う前に, JanomeのTokenizerで分かち書きの前処理を行う. 

## ライブラリの読み込み

In [3]:
from tensorflow import keras
from keras.preprocessing import sequence
from keras import preprocessing
import keras
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


## 前処理 (ベクトル化)

In [4]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
vocab = tokenizer.word_index
seqs = tokenizer.texts_to_sequences(data)

In [5]:
vocab

{'。': 1,
 'ます': 2,
 'ご': 3,
 'は': 4,
 'を': 5,
 '100': 6,
 '名': 7,
 'まで': 8,
 '収容': 9,
 '可能': 10,
 'な': 11,
 '会場': 12,
 'ドレス': 13,
 'の': 14,
 '試着': 15,
 '、': 16,
 '要望': 17,
 'に': 18,
 'お': 19,
 '応え': 20,
 'し': 21,
 '写真': 22,
 '撮影': 23,
 '行い': 24,
 '宜しく': 25,
 'お願い': 26,
 '致し': 27,
 '私': 28,
 '犬': 29,
 '散歩': 30,
 'する': 31}

In [6]:
seqs

[[6, 7, 8, 9, 10, 11, 12, 1],
 [13, 14, 3, 15, 4, 16],
 [3, 17, 18, 19, 20, 21, 2, 1],
 [22, 23, 5, 24, 2, 1],
 [25, 26, 27, 2, 1],
 [28, 4, 29, 5, 30, 31, 1]]

## シーケンスを同じ長さになるように詰める

In [7]:
def prepare_sentence(seq, maxlen):
    x = []
    y = []
    for i, w in enumerate(seq):
        x_padded = pad_sequences([seq[:i]],
                                 maxlen=maxlen - 1,
                                 padding='pre')[0]
        x.append(x_padded)
        y.append(w)
    return x, y

## xとyを準備する

In [8]:
maxlen = max([len(seq) for seq in seqs])
x = []
y = []
for seq in seqs:
    x_windows, y_windows = prepare_sentence(seq, maxlen)
    x += x_windows
    y += y_windows
x = np.array(x)
y = np.array(y) - 1
y = np.eye(len(vocab))[y]

## モデリング

In [9]:
model = Sequential()
model.add(Embedding(input_dim=len(vocab) + 1,  
                    output_dim=5,
                    input_length=maxlen - 1)) 
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(len(vocab), activation='softmax'))

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

## 学習させる

In [10]:
model.fit(x, y, epochs=1000)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoc

Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch

Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/

Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/

Epoch 309/1000
Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/1000
Epoch 369/1000
Epoch 370/1000
Epoch 371/1000
Epoch 372/1000
Epoch 373/1000
Epoch 374/1000
Epoch 375/

Epoch 388/1000
Epoch 389/1000
Epoch 390/1000
Epoch 391/1000
Epoch 392/1000
Epoch 393/1000
Epoch 394/1000
Epoch 395/1000
Epoch 396/1000
Epoch 397/1000
Epoch 398/1000
Epoch 399/1000
Epoch 400/1000
Epoch 401/1000
Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/1000
Epoch 406/1000
Epoch 407/1000
Epoch 408/1000
Epoch 409/1000
Epoch 410/1000
Epoch 411/1000
Epoch 412/1000
Epoch 413/1000
Epoch 414/1000
Epoch 415/1000
Epoch 416/1000
Epoch 417/1000
Epoch 418/1000
Epoch 419/1000
Epoch 420/1000
Epoch 421/1000
Epoch 422/1000
Epoch 423/1000
Epoch 424/1000
Epoch 425/1000
Epoch 426/1000
Epoch 427/1000
Epoch 428/1000
Epoch 429/1000
Epoch 430/1000
Epoch 431/1000
Epoch 432/1000
Epoch 433/1000
Epoch 434/1000
Epoch 435/1000
Epoch 436/1000
Epoch 437/1000
Epoch 438/1000
Epoch 439/1000
Epoch 440/1000
Epoch 441/1000
Epoch 442/1000
Epoch 443/1000
Epoch 444/1000
Epoch 445/1000
Epoch 446/1000
Epoch 447/1000
Epoch 448/1000
Epoch 449/1000
Epoch 450/1000
Epoch 451/1000
Epoch 452/1000
Epoch 453/1000
Epoch 454/

Epoch 466/1000
Epoch 467/1000
Epoch 468/1000
Epoch 469/1000
Epoch 470/1000
Epoch 471/1000
Epoch 472/1000
Epoch 473/1000
Epoch 474/1000
Epoch 475/1000
Epoch 476/1000
Epoch 477/1000
Epoch 478/1000
Epoch 479/1000
Epoch 480/1000
Epoch 481/1000
Epoch 482/1000
Epoch 483/1000
Epoch 484/1000
Epoch 485/1000
Epoch 486/1000
Epoch 487/1000
Epoch 488/1000
Epoch 489/1000
Epoch 490/1000
Epoch 491/1000
Epoch 492/1000
Epoch 493/1000
Epoch 494/1000
Epoch 495/1000
Epoch 496/1000
Epoch 497/1000
Epoch 498/1000
Epoch 499/1000
Epoch 500/1000
Epoch 501/1000
Epoch 502/1000
Epoch 503/1000
Epoch 504/1000
Epoch 505/1000
Epoch 506/1000
Epoch 507/1000
Epoch 508/1000
Epoch 509/1000
Epoch 510/1000
Epoch 511/1000
Epoch 512/1000
Epoch 513/1000
Epoch 514/1000
Epoch 515/1000
Epoch 516/1000
Epoch 517/1000
Epoch 518/1000
Epoch 519/1000
Epoch 520/1000
Epoch 521/1000
Epoch 522/1000
Epoch 523/1000
Epoch 524/1000
Epoch 525/1000
Epoch 526/1000
Epoch 527/1000
Epoch 528/1000
Epoch 529/1000
Epoch 530/1000
Epoch 531/1000
Epoch 532/

Epoch 544/1000
Epoch 545/1000
Epoch 546/1000
Epoch 547/1000
Epoch 548/1000
Epoch 549/1000
Epoch 550/1000
Epoch 551/1000
Epoch 552/1000
Epoch 553/1000
Epoch 554/1000
Epoch 555/1000
Epoch 556/1000
Epoch 557/1000
Epoch 558/1000
Epoch 559/1000
Epoch 560/1000
Epoch 561/1000
Epoch 562/1000
Epoch 563/1000
Epoch 564/1000
Epoch 565/1000
Epoch 566/1000
Epoch 567/1000
Epoch 568/1000
Epoch 569/1000
Epoch 570/1000
Epoch 571/1000
Epoch 572/1000
Epoch 573/1000
Epoch 574/1000
Epoch 575/1000
Epoch 576/1000
Epoch 577/1000
Epoch 578/1000
Epoch 579/1000
Epoch 580/1000
Epoch 581/1000
Epoch 582/1000
Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/1000
Epoch 593/1000
Epoch 594/1000
Epoch 595/1000
Epoch 596/1000
Epoch 597/1000
Epoch 598/1000
Epoch 599/1000
Epoch 600/1000
Epoch 601/1000
Epoch 602/1000
Epoch 603/1000
Epoch 604/1000
Epoch 605/1000
Epoch 606/1000
Epoch 607/1000
Epoch 608/1000
Epoch 609/1000
Epoch 610/

Epoch 623/1000
Epoch 624/1000
Epoch 625/1000
Epoch 626/1000
Epoch 627/1000
Epoch 628/1000
Epoch 629/1000
Epoch 630/1000
Epoch 631/1000
Epoch 632/1000
Epoch 633/1000
Epoch 634/1000
Epoch 635/1000
Epoch 636/1000
Epoch 637/1000
Epoch 638/1000
Epoch 639/1000
Epoch 640/1000
Epoch 641/1000
Epoch 642/1000
Epoch 643/1000
Epoch 644/1000
Epoch 645/1000
Epoch 646/1000
Epoch 647/1000
Epoch 648/1000
Epoch 649/1000
Epoch 650/1000
Epoch 651/1000
Epoch 652/1000
Epoch 653/1000
Epoch 654/1000
Epoch 655/1000
Epoch 656/1000
Epoch 657/1000
Epoch 658/1000
Epoch 659/1000
Epoch 660/1000
Epoch 661/1000
Epoch 662/1000
Epoch 663/1000
Epoch 664/1000
Epoch 665/1000
Epoch 666/1000
Epoch 667/1000
Epoch 668/1000
Epoch 669/1000
Epoch 670/1000
Epoch 671/1000
Epoch 672/1000
Epoch 673/1000
Epoch 674/1000
Epoch 675/1000
Epoch 676/1000
Epoch 677/1000
Epoch 678/1000
Epoch 679/1000
Epoch 680/1000
Epoch 681/1000
Epoch 682/1000
Epoch 683/1000
Epoch 684/1000
Epoch 685/1000
Epoch 686/1000
Epoch 687/1000
Epoch 688/1000
Epoch 689/

Epoch 702/1000
Epoch 703/1000
Epoch 704/1000
Epoch 705/1000
Epoch 706/1000
Epoch 707/1000
Epoch 708/1000
Epoch 709/1000
Epoch 710/1000
Epoch 711/1000
Epoch 712/1000
Epoch 713/1000
Epoch 714/1000
Epoch 715/1000
Epoch 716/1000
Epoch 717/1000
Epoch 718/1000
Epoch 719/1000
Epoch 720/1000
Epoch 721/1000
Epoch 722/1000
Epoch 723/1000
Epoch 724/1000
Epoch 725/1000
Epoch 726/1000
Epoch 727/1000
Epoch 728/1000
Epoch 729/1000
Epoch 730/1000
Epoch 731/1000
Epoch 732/1000
Epoch 733/1000
Epoch 734/1000
Epoch 735/1000
Epoch 736/1000
Epoch 737/1000
Epoch 738/1000
Epoch 739/1000
Epoch 740/1000
Epoch 741/1000
Epoch 742/1000
Epoch 743/1000
Epoch 744/1000
Epoch 745/1000
Epoch 746/1000
Epoch 747/1000
Epoch 748/1000
Epoch 749/1000
Epoch 750/1000
Epoch 751/1000
Epoch 752/1000
Epoch 753/1000
Epoch 754/1000
Epoch 755/1000
Epoch 756/1000
Epoch 757/1000
Epoch 758/1000
Epoch 759/1000
Epoch 760/1000
Epoch 761/1000
Epoch 762/1000
Epoch 763/1000
Epoch 764/1000
Epoch 765/1000
Epoch 766/1000
Epoch 767/1000
Epoch 768/

Epoch 780/1000
Epoch 781/1000
Epoch 782/1000
Epoch 783/1000
Epoch 784/1000
Epoch 785/1000
Epoch 786/1000
Epoch 787/1000
Epoch 788/1000
Epoch 789/1000
Epoch 790/1000
Epoch 791/1000
Epoch 792/1000
Epoch 793/1000
Epoch 794/1000
Epoch 795/1000
Epoch 796/1000
Epoch 797/1000
Epoch 798/1000
Epoch 799/1000
Epoch 800/1000
Epoch 801/1000
Epoch 802/1000
Epoch 803/1000
Epoch 804/1000
Epoch 805/1000
Epoch 806/1000
Epoch 807/1000
Epoch 808/1000
Epoch 809/1000
Epoch 810/1000
Epoch 811/1000
Epoch 812/1000
Epoch 813/1000
Epoch 814/1000
Epoch 815/1000
Epoch 816/1000
Epoch 817/1000
Epoch 818/1000
Epoch 819/1000
Epoch 820/1000
Epoch 821/1000
Epoch 822/1000
Epoch 823/1000
Epoch 824/1000
Epoch 825/1000
Epoch 826/1000
Epoch 827/1000
Epoch 828/1000
Epoch 829/1000
Epoch 830/1000
Epoch 831/1000
Epoch 832/1000
Epoch 833/1000
Epoch 834/1000
Epoch 835/1000
Epoch 836/1000
Epoch 837/1000
Epoch 838/1000
Epoch 839/1000
Epoch 840/1000
Epoch 841/1000
Epoch 842/1000
Epoch 843/1000
Epoch 844/1000
Epoch 845/1000
Epoch 846/

Epoch 859/1000
Epoch 860/1000
Epoch 861/1000
Epoch 862/1000
Epoch 863/1000
Epoch 864/1000
Epoch 865/1000
Epoch 866/1000
Epoch 867/1000
Epoch 868/1000
Epoch 869/1000
Epoch 870/1000
Epoch 871/1000
Epoch 872/1000
Epoch 873/1000
Epoch 874/1000
Epoch 875/1000
Epoch 876/1000
Epoch 877/1000
Epoch 878/1000
Epoch 879/1000
Epoch 880/1000
Epoch 881/1000
Epoch 882/1000
Epoch 883/1000
Epoch 884/1000
Epoch 885/1000
Epoch 886/1000
Epoch 887/1000
Epoch 888/1000
Epoch 889/1000
Epoch 890/1000
Epoch 891/1000
Epoch 892/1000
Epoch 893/1000
Epoch 894/1000
Epoch 895/1000
Epoch 896/1000
Epoch 897/1000
Epoch 898/1000
Epoch 899/1000
Epoch 900/1000
Epoch 901/1000
Epoch 902/1000
Epoch 903/1000
Epoch 904/1000
Epoch 905/1000
Epoch 906/1000
Epoch 907/1000
Epoch 908/1000
Epoch 909/1000
Epoch 910/1000
Epoch 911/1000
Epoch 912/1000
Epoch 913/1000
Epoch 914/1000
Epoch 915/1000
Epoch 916/1000
Epoch 917/1000
Epoch 918/1000
Epoch 919/1000
Epoch 920/1000
Epoch 921/1000
Epoch 922/1000
Epoch 923/1000
Epoch 924/1000
Epoch 925/

Epoch 937/1000
Epoch 938/1000
Epoch 939/1000
Epoch 940/1000
Epoch 941/1000
Epoch 942/1000
Epoch 943/1000
Epoch 944/1000
Epoch 945/1000
Epoch 946/1000
Epoch 947/1000
Epoch 948/1000
Epoch 949/1000
Epoch 950/1000
Epoch 951/1000
Epoch 952/1000
Epoch 953/1000
Epoch 954/1000
Epoch 955/1000
Epoch 956/1000
Epoch 957/1000
Epoch 958/1000
Epoch 959/1000
Epoch 960/1000
Epoch 961/1000
Epoch 962/1000
Epoch 963/1000
Epoch 964/1000
Epoch 965/1000
Epoch 966/1000
Epoch 967/1000
Epoch 968/1000
Epoch 969/1000
Epoch 970/1000
Epoch 971/1000
Epoch 972/1000
Epoch 973/1000
Epoch 974/1000
Epoch 975/1000
Epoch 976/1000
Epoch 977/1000
Epoch 978/1000
Epoch 979/1000
Epoch 980/1000
Epoch 981/1000
Epoch 982/1000
Epoch 983/1000
Epoch 984/1000
Epoch 985/1000
Epoch 986/1000
Epoch 987/1000
Epoch 988/1000
Epoch 989/1000
Epoch 990/1000
Epoch 991/1000
Epoch 992/1000
Epoch 993/1000
Epoch 994/1000
Epoch 995/1000
Epoch 996/1000
Epoch 997/1000
Epoch 998/1000
Epoch 999/1000
Epoch 1000/1000


<keras.callbacks.callbacks.History at 0x653a87f50>

## 発生確率を計算する

In [11]:
input_sentence = "私は犬に散歩する。"
sentence = t.tokenize(input_sentence, wakati=True)
tok = tokenizer.texts_to_sequences([sentence])[0]
x_test, y_test = prepare_sentence(tok, maxlen)
x_test = np.array(x_test)
y_test = np.array(y_test) - 1 
p_pred = model.predict(x_test)  
vocab_inv = {v: k for k, v in vocab.items()}

・ input_sentenceに誤字脱字, 衍字をチェックしたい文章を入力する. <br>
・ ある単語の次に来る単語が正しいか, 誤っているかを確率的に判断する.

## 算出した確率をもとに正常か, 異常かを判定する

In [13]:
log_p_sentence = 0
err = []
words = []
for i, prob in enumerate(p_pred):
    word = vocab_inv[y_test[i]+1]
    words.append(word)
    history = ' '.join([vocab_inv[w] for w in x_test[i, :] if w != 0])
    prob_word = prob[y_test[i]]
    log_p_sentence += np.log(prob_word)
    
    if prob_word < 0.01:
        err.append(word)

    print('P(w={}|h={})={}'.format(word, history, prob_word))
print('Prob. sentence: {}'.format(np.exp(log_p_sentence)))

# 「誤字脱字箇所」と「誤字脱字を含む文」を出力.
if len(err) != 0:
    print("NG : " + str(err))
    print(input_sentence)
    
# 訂正箇所なしの場合
else:
    print("OK")

P(w=私|h=)=0.16551995277404785
P(w=は|h=私)=0.9951953291893005
P(w=犬|h=私 は)=0.9960660338401794
P(w=に|h=私 は 犬)=2.611126546980813e-06
P(w=散歩|h=私 は 犬 に)=0.9933903217315674
P(w=する|h=私 は 犬 に 散歩)=0.9988625049591064
P(w=。|h=私 は 犬 に 散歩 する)=0.9998970031738281
Prob. sentence: 4.250651700854218e-07
NG : ['に']
私は犬に散歩する。


&nbsp;

# 言語モデル

## データの読み込み

In [14]:
from janome.tokenizer import Tokenizer

path = "data.txt"
file = open(path, "r", encoding="utf-8")
text = file.read()

text

'100名まで収容可能な会場。\nドレスのご試着は、\nご要望にお応えします。\n写真撮影を行います。\n宜しくお願い致します。\n私は犬を散歩する。'

## 前処理 (分かち書き)

In [15]:
from janome.tokenizer import Tokenizer

t = Tokenizer()
def wakati(text):
    w = t.tokenize(text, wakati=True)
    return " ".join(w)

data = [wakati(w) for w in text]

data

['1',
 '0',
 '0',
 '名',
 'ま',
 'で',
 '収',
 '容',
 '可',
 '能',
 'な',
 '会',
 '場',
 '。',
 '',
 'ド',
 'レ',
 'ス',
 'の',
 'ご',
 '試',
 '着',
 'は',
 '、',
 '',
 'ご',
 '要',
 '望',
 'に',
 'お',
 '応',
 'え',
 'し',
 'ま',
 'す',
 '。',
 '',
 '写',
 '真',
 '撮',
 '影',
 'を',
 '行',
 'い',
 'ま',
 'す',
 '。',
 '',
 '宜',
 'し',
 'く',
 'お',
 '願',
 'い',
 '致',
 'し',
 'ま',
 'す',
 '。',
 '',
 '私',
 'は',
 '犬',
 'を',
 '散',
 '歩',
 'す',
 'る',
 '。']

## モデルから文字列を生成

In [16]:
def generate_seq(model, tokenizer, seed_text, n_words):
    in_text, result = seed_text, seed_text
    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        encoded = np.array(encoded)
        # 学習データから文字列を予測。
        yhat = model.predict_classes(encoded, verbose=0)
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        in_text, result = out_word, result + ' ' + out_word
    return result

## 前処理 (ベクトル化)

In [17]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

## 語彙のサイズの決定

In [18]:
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

sequences = list()
for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

Vocabulary Size: 48
Total Sequences: 68


## Xとy要素に分割する

In [19]:
sequences = np.array(sequences)
X, y = sequences[:,0],sequences[:,1]

## One-hotエンコーディング

In [20]:
y = to_categorical(y, num_classes=vocab_size)

## モデリング

In [21]:
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 1, 10)             480       
_________________________________________________________________
bidirectional_2 (Bidirection (None, 128)               38400     
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 48)                6192      
Total params: 45,072
Trainable params: 45,072
Non-trainable params: 0
_________________________________________________________________
None


## 学習させる

In [22]:
model.fit(X, y, epochs=500, verbose=2)

Epoch 1/500
 - 1s - loss: 3.8720 - accuracy: 0.0147
Epoch 2/500
 - 0s - loss: 3.8692 - accuracy: 0.0441
Epoch 3/500
 - 0s - loss: 3.8677 - accuracy: 0.0735
Epoch 4/500
 - 0s - loss: 3.8663 - accuracy: 0.1471
Epoch 5/500
 - 0s - loss: 3.8639 - accuracy: 0.1324
Epoch 6/500
 - 0s - loss: 3.8630 - accuracy: 0.1176
Epoch 7/500
 - 0s - loss: 3.8612 - accuracy: 0.1176
Epoch 8/500
 - 0s - loss: 3.8598 - accuracy: 0.1324
Epoch 9/500
 - 0s - loss: 3.8579 - accuracy: 0.1324
Epoch 10/500
 - 0s - loss: 3.8565 - accuracy: 0.1471
Epoch 11/500
 - 0s - loss: 3.8546 - accuracy: 0.1618
Epoch 12/500
 - 0s - loss: 3.8524 - accuracy: 0.1765
Epoch 13/500
 - 0s - loss: 3.8506 - accuracy: 0.1471
Epoch 14/500
 - 0s - loss: 3.8481 - accuracy: 0.1765
Epoch 15/500
 - 0s - loss: 3.8478 - accuracy: 0.1765
Epoch 16/500
 - 0s - loss: 3.8444 - accuracy: 0.1324
Epoch 17/500
 - 0s - loss: 3.8417 - accuracy: 0.1471
Epoch 18/500
 - 0s - loss: 3.8386 - accuracy: 0.1765
Epoch 19/500
 - 0s - loss: 3.8376 - accuracy: 0.1618
Ep

Epoch 155/500
 - 0s - loss: 1.4694 - accuracy: 0.6912
Epoch 156/500
 - 0s - loss: 1.4584 - accuracy: 0.6765
Epoch 157/500
 - 0s - loss: 1.4028 - accuracy: 0.7206
Epoch 158/500
 - 0s - loss: 1.3829 - accuracy: 0.6324
Epoch 159/500
 - 0s - loss: 1.3722 - accuracy: 0.7353
Epoch 160/500
 - 0s - loss: 1.3799 - accuracy: 0.7647
Epoch 161/500
 - 0s - loss: 1.3657 - accuracy: 0.7500
Epoch 162/500
 - 0s - loss: 1.4185 - accuracy: 0.6471
Epoch 163/500
 - 0s - loss: 1.3523 - accuracy: 0.7647
Epoch 164/500
 - 0s - loss: 1.3216 - accuracy: 0.7353
Epoch 165/500
 - 0s - loss: 1.3009 - accuracy: 0.7647
Epoch 166/500
 - 0s - loss: 1.3236 - accuracy: 0.7353
Epoch 167/500
 - 0s - loss: 1.3263 - accuracy: 0.7647
Epoch 168/500
 - 0s - loss: 1.2890 - accuracy: 0.7647
Epoch 169/500
 - 0s - loss: 1.2867 - accuracy: 0.7500
Epoch 170/500
 - 0s - loss: 1.2588 - accuracy: 0.7794
Epoch 171/500
 - 0s - loss: 1.2485 - accuracy: 0.7500
Epoch 172/500
 - 0s - loss: 1.2791 - accuracy: 0.7500
Epoch 173/500
 - 0s - loss: 

Epoch 307/500
 - 0s - loss: 0.5191 - accuracy: 0.8235
Epoch 308/500
 - 0s - loss: 0.5321 - accuracy: 0.7794
Epoch 309/500
 - 0s - loss: 0.5294 - accuracy: 0.8088
Epoch 310/500
 - 0s - loss: 0.4974 - accuracy: 0.7941
Epoch 311/500
 - 0s - loss: 0.4674 - accuracy: 0.8235
Epoch 312/500
 - 0s - loss: 0.5001 - accuracy: 0.8088
Epoch 313/500
 - 0s - loss: 0.5045 - accuracy: 0.8235
Epoch 314/500
 - 0s - loss: 0.4827 - accuracy: 0.7794
Epoch 315/500
 - 0s - loss: 0.4760 - accuracy: 0.8235
Epoch 316/500
 - 0s - loss: 0.4679 - accuracy: 0.8235
Epoch 317/500
 - 0s - loss: 0.4720 - accuracy: 0.8382
Epoch 318/500
 - 0s - loss: 0.5049 - accuracy: 0.8088
Epoch 319/500
 - 0s - loss: 0.4738 - accuracy: 0.8235
Epoch 320/500
 - 0s - loss: 0.5208 - accuracy: 0.7794
Epoch 321/500
 - 0s - loss: 0.5239 - accuracy: 0.7794
Epoch 322/500
 - 0s - loss: 0.4395 - accuracy: 0.8529
Epoch 323/500
 - 0s - loss: 0.4742 - accuracy: 0.8235
Epoch 324/500
 - 0s - loss: 0.4881 - accuracy: 0.7941
Epoch 325/500
 - 0s - loss: 

Epoch 459/500
 - 0s - loss: 0.4220 - accuracy: 0.7500
Epoch 460/500
 - 0s - loss: 0.3594 - accuracy: 0.8529
Epoch 461/500
 - 0s - loss: 0.4042 - accuracy: 0.7941
Epoch 462/500
 - 0s - loss: 0.3776 - accuracy: 0.8382
Epoch 463/500
 - 0s - loss: 0.3931 - accuracy: 0.7941
Epoch 464/500
 - 0s - loss: 0.4318 - accuracy: 0.7794
Epoch 465/500
 - 0s - loss: 0.4031 - accuracy: 0.8235
Epoch 466/500
 - 0s - loss: 0.3604 - accuracy: 0.8235
Epoch 467/500
 - 0s - loss: 0.3557 - accuracy: 0.8824
Epoch 468/500
 - 0s - loss: 0.4209 - accuracy: 0.7794
Epoch 469/500
 - 0s - loss: 0.4152 - accuracy: 0.8088
Epoch 470/500
 - 0s - loss: 0.3995 - accuracy: 0.8235
Epoch 471/500
 - 0s - loss: 0.4136 - accuracy: 0.7794
Epoch 472/500
 - 0s - loss: 0.4211 - accuracy: 0.7794
Epoch 473/500
 - 0s - loss: 0.4156 - accuracy: 0.8235
Epoch 474/500
 - 0s - loss: 0.3790 - accuracy: 0.8235
Epoch 475/500
 - 0s - loss: 0.3955 - accuracy: 0.8088
Epoch 476/500
 - 0s - loss: 0.4011 - accuracy: 0.7941
Epoch 477/500
 - 0s - loss: 

<keras.callbacks.callbacks.History at 0x6560448d0>

## 生成開始地点を決定

In [23]:
typo_words = []
typo_index = []
if len(words) != len(sentence):
    for i in range(len(sentence)):
        if not str(sentence[i]) in words:
            typo_words.append(str(sentence[i]))
            typo_index.append(sentence.index(typo_words[0]))
            
else:
    for i in range(len(err)):
        for j in range(len(sentence)):
            if str(err[i]) == str(sentence[j]):
                typo_words.append(str(err[i]))
                typo_index.append(sentence.index(typo_words[0]))

search_index = []
search_word = []
for i in range(len(typo_index)):
    search_index.append(int(typo_index[i])-1)
    search_word.append(sentence[search_index[i]])

## 評価

In [24]:
# 評価
for i in range(len(search_word)):
    print("prediction: " + generate_seq(model, tokenizer, str(search_word[i]) , 1))

prediction: 犬 を
