From 9d742ced7e8725a057098024c94a7c029f3e5790 Mon Sep 17 00:00:00 2001 From: nownabe Date: Wed, 26 Sep 2018 14:29:39 +0900 Subject: [PATCH 1/3] 3.2.1 --- ch03/cbow_predict.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 ch03/cbow_predict.py diff --git a/ch03/cbow_predict.py b/ch03/cbow_predict.py new file mode 100644 index 0000000..ab36aa0 --- /dev/null +++ b/ch03/cbow_predict.py @@ -0,0 +1,22 @@ +import sys +sys.path.append('..') +import numpy as np +from common.layers import MatMul + + +c0 = np.array([[1, 0, 0, 0, 0, 0, 0]]) +c1 = np.array([[0, 0, 1, 0, 0, 0, 0]]) + +W_in = np.random.randn(7, 3) +W_out = np.random.randn(3, 7) + +in_layer0 = MatMul(W_in) +in_layer1 = MatMul(W_in) +out_layer = MatMul(W_out) + +h0 = in_layer0.forward(c0) +h1 = in_layer1.forward(c1) +h = 0.5 * (h0 + h1) +s = out_layer.forward(h) + +print(s) From 41cc7779a2678a3c6a01ab81c8edba85b26e4073 Mon Sep 17 00:00:00 2001 From: nownabe Date: Wed, 26 Sep 2018 14:31:36 +0900 Subject: [PATCH 2/3] 3.4 --- ch03/simple_cbow.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 ch03/simple_cbow.py diff --git a/ch03/simple_cbow.py b/ch03/simple_cbow.py new file mode 100644 index 0000000..7b0c595 --- /dev/null +++ b/ch03/simple_cbow.py @@ -0,0 +1,41 @@ +import sys +sys.path.append('..') +import numpy as np +from common.layers import MatMul, SoftmaxWithLoss + + +class SimpleCBOW: + def __init__(self, vocab_size, hidden_size): + V, H = vocab_size, hidden_size + + W_in = 0.01 * np.random.randn(V, H).astype('f') + W_out = 0.01 * np.random.randn(H, V).astype('f') + + self.in_layer0 = MatMul(W_in) + self.in_layer1 = MatMul(W_in) + self.out_layer = MatMul(W_out) + self.loss_layer = SoftmaxWithLoss() + + layers = [self.in_layer0, self.in_layer1, self.out_layer] + self.params, self.grads = [], [] + for layer in layers: + self.params += layer.params + self.grads += layer.grads + + self.word_vecs = W_in + + def forward(self, contexts, target): + h0 = self.in_layer0.forward(contexts[:, 0]) + h1 = self.in_layer1.forward(contexts[:, 1]) + h = (h0 + h1) * 0.5 + score = self.out_layer.forward(h) + loss = self.loss_layer.forward(score, target) + return loss + + def backward(self, dout=1): + ds = self.loss_layer.backward(dout) + da = self.out_layer.backward(ds) + da *= 0.5 + self.in_layer1.backward(da) + self.in_layer0.backward(da) + return None From 6337163654fc8a9f64c5c62216c421d668c493b5 Mon Sep 17 00:00:00 2001 From: nownabe Date: Wed, 26 Sep 2018 14:31:51 +0900 Subject: [PATCH 3/3] 3.4.1 --- ch03/train.py | 32 ++++++++++++++++++++++++++++++++ common/optimizer.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 ch03/train.py diff --git a/ch03/train.py b/ch03/train.py new file mode 100644 index 0000000..65b39dd --- /dev/null +++ b/ch03/train.py @@ -0,0 +1,32 @@ +import sys +sys.path.append('..') +from common.trainer import Trainer +from common.optimizer import Adam +from simple_cbow import SimpleCBOW +from common.util import preprocess, create_contexts_target, convert_one_hot + + +window_size = 1 +hidden_size = 5 +batch_size = 3 +max_epoch = 1000 + +text = 'You say goodbye and I say hello.' +corpus, word_to_id, id_to_word = preprocess(text) + +vocab_size = len(word_to_id) +contexts, target = create_contexts_target(corpus, window_size) +target = convert_one_hot(target, vocab_size) +contexts = convert_one_hot(contexts, vocab_size) + +model = SimpleCBOW(vocab_size, hidden_size) +optimizer = Adam() +trainer = Trainer(model, optimizer) + +trainer.fit(contexts, target, max_epoch, batch_size) +trainer.plot() + + +word_vecs = model.word_vecs +for word_id, word in id_to_word.items(): + print(word, word_vecs[word_id]) diff --git a/common/optimizer.py b/common/optimizer.py index bc1b1a7..a56840c 100644 --- a/common/optimizer.py +++ b/common/optimizer.py @@ -1,3 +1,7 @@ +import sys +sys.path.append('..') +from common.np import * + class SGD: def __init__(self, lr=0.01): self.lr = lr @@ -5,3 +9,32 @@ def __init__(self, lr=0.01): def update(self, params, grads): for i in range(len(params)): params[i] -= self.lr * grads[i] + + +class Adam: + ''' + Adam (http://arxiv.org/abs/1412.6980v8) + ''' + def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): + self.lr = lr + self.beta1 = beta1 + self.beta2 = beta2 + self.iter = 0 + self.m = None + self.v = None + + def update(self, params, grads): + if self.m is None: + self.m, self.v = [], [] + for param in params: + self.m.append(np.zeros_like(param)) + self.v.append(np.zeros_like(param)) + + self.iter += 1 + lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) + + for i in range(len(params)): + self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i]) + self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i]) + + params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)