## 改良されたCBOWモデル
前章で作成したシンプルなCBOWには、大きなデータを学習させると時間がかかりすぎる問題があった。そこで改良されたCBOWモデルでは、Embeddingレイヤ、NegativeSamplingLossレイヤを導入し、さらに任意のウィンドウサイズを指定できるようにした

In [1]:
import sys
sys.path.append("../../deep-learning-from-scratch-2")
import numpy as np
from common.layers import Embedding
from ch04.negative_sampling_layer import NegativeSamplingLoss

In [2]:
class CBOW:
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
        V, H = vocab_size, hidden_size
        
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(V, H).astype("f")
        
        self.in_layers = []
        for i in range(2 * window_size):
            layer = Embedding(W_in)
            self.in_layers.append(layer)
        self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)
        
        layers = self.in_layers + [self.ns_loss]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
        
        self.word_vecs = W_in
        
    def forward(self, contexts, target):
        h = 0
        for i, layer in enumerate(self.in_layers):
            h += layer.forward(contexts[:, i])
        h *= 1/len(self.in_layers)
        loss = self.ns_loss.forward(h, target)
        return loss
    
    def backward(self, dout=1):
        dout = self.ns_loss.backward(dout)
        dout *= 1/len(self.in_layers)
        for layer in self.in_layers:
            layer.backward(dout)
        
        return None

In [6]:
#CBOWでの学習
from common import config
import pickle
from common.trainer import Trainer
from common.optimizer import Adam
from common.util import create_contexts_target, to_cpu, to_gpu
from dataset import ptb

In [7]:
window_size = 5
hidden_size = 100
batch_size = 100
max_epoch = 10

corpus, word_to_id, id_to_word = ptb.load_data("train")
vocab_size = len(word_to_id)

contexts, target = create_contexts_target(corpus, window_size)

model = CBOW(vocab_size, hidden_size, window_size, corpus)
optimizer = Adam()
trainer = Trainer(model, optimizer)

trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()

| epoch 1 |  iter 1 / 9295 | time 0[s] | loss 4.16
| epoch 1 |  iter 21 / 9295 | time 4[s] | loss 4.16
| epoch 1 |  iter 41 / 9295 | time 9[s] | loss 4.15
| epoch 1 |  iter 61 / 9295 | time 13[s] | loss 4.12
| epoch 1 |  iter 81 / 9295 | time 17[s] | loss 4.05
| epoch 1 |  iter 101 / 9295 | time 22[s] | loss 3.93
| epoch 1 |  iter 121 / 9295 | time 27[s] | loss 3.78
| epoch 1 |  iter 141 / 9295 | time 31[s] | loss 3.63
| epoch 1 |  iter 161 / 9295 | time 35[s] | loss 3.48
| epoch 1 |  iter 181 / 9295 | time 40[s] | loss 3.35
| epoch 1 |  iter 201 / 9295 | time 44[s] | loss 3.25
| epoch 1 |  iter 221 / 9295 | time 49[s] | loss 3.16
| epoch 1 |  iter 241 / 9295 | time 53[s] | loss 3.08
| epoch 1 |  iter 261 / 9295 | time 57[s] | loss 3.02
| epoch 1 |  iter 281 / 9295 | time 62[s] | loss 2.97
| epoch 1 |  iter 301 / 9295 | time 66[s] | loss 2.92
| epoch 1 |  iter 321 / 9295 | time 70[s] | loss 2.88
| epoch 1 |  iter 341 / 9295 | time 75[s] | loss 2.85
| epoch 1 |  iter 361 / 9295 | time 7

KeyboardInterrupt: 

In [9]:
# 学習結果の評価。学習に半日程度かかってしまうので、学習済みデータを使用

from common.util import most_similar


In [11]:
pkl_file = "../../deep-learning-from-scratch-2/ch04/cbow_params.pkl"
with open(pkl_file, 'rb') as f:
    params = pickle.load(f)
    word_vecs = params['word_vecs']
    word_to_id = params['word_to_id']
    id_to_word = params['id_to_word']

In [12]:
most_similar("you", word_to_id, id_to_word, word_vecs, top=7)


[query] you
 we: 0.6103515625
 someone: 0.59130859375
 i: 0.55419921875
 something: 0.48974609375
 anyone: 0.47314453125
 maybe: 0.46484375
 good: 0.4560546875


In [13]:
most_similar("car", word_to_id, id_to_word, word_vecs, top=7)


[query] car
 luxury: 0.497314453125
 arabia: 0.47802734375
 auto: 0.47119140625
 disk-drive: 0.450927734375
 travel: 0.4091796875
 retailer: 0.405517578125
 pse: 0.3935546875


In [14]:
most_similar("funny", word_to_id, id_to_word, word_vecs, top=7)


[query] funny
 really: 0.5361328125
 stuff: 0.5263671875
 please: 0.5009765625
 everybody: 0.488037109375
 voice: 0.485595703125
 someone: 0.474853515625
 impossible: 0.46240234375


In [15]:
most_similar("son", word_to_id, id_to_word, word_vecs, top=7)


[query] son
 wife: 0.57958984375
 editor: 0.5732421875
 constitution: 0.55029296875
 mother: 0.53515625
 mayor: 0.52685546875
 gallery: 0.51220703125
 owner: 0.50927734375


In [16]:
from common.util import analogy

In [19]:
analogy("king", "man", "queen", word_to_id, id_to_word, word_vecs, top=5)


[analogy] king:man = queen:?
 woman: 5.16015625
 veto: 4.9296875
 ounce: 4.69140625
 earthquake: 4.6328125
 successor: 4.609375


In [23]:
analogy("love", "like", "hate", word_to_id, id_to_word, word_vecs, top=5)


[analogy] love:like = hate:?
 a.m: 4.71875
 trading: 4.515625
 think: 4.3046875
 at: 4.2734375
 share: 4.14453125
