This repository has been archived by the owner on Jun 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
embeddings.py
98 lines (87 loc) · 4.47 KB
/
embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import math
import numpy as np
import theano
import theano.tensor as T
import util
class Embeddings(object):
def __init__(self, vocab_size, embedding_dim,
idxs=None, sequence_embeddings=None):
assert (idxs is None) ^ (sequence_embeddings is None)
#self.name = name
if idxs is not None:
# not tying weights, build our own set of embeddings
self.Wx = util.sharedMatrix(vocab_size, embedding_dim, 'Wx',
orthogonal_init=True)
self.sequence_embeddings = self.Wx[idxs]
self.using_shared_embeddings = False
else:
# using tied weights, we won't be handling the update
self.sequence_embeddings = sequence_embeddings
self.using_shared_embeddings = True
def params_for_l2_penalty(self):
if self.using_shared_embeddings:
return []
return [self.sequence_embeddings]
def updates_wrt_cost(self, cost, learning_opts):
if self.using_shared_embeddings:
return []
learning_rate = learning_opts.learning_rate
gradient = util.clipped(T.grad(cost=cost, wrt=self.sequence_embeddings))
return [(self.Wx, T.inc_subtensor(self.sequence_embeddings,
-learning_rate * gradient))]
def embeddings(self):
return self.sequence_embeddings
class TiedEmbeddings(object):
def __init__(self, vocab_size, embedding_dim, initial_embeddings_file=None,
train_embeddings=True):
if not train_embeddings and initial_embeddings_file is None:
print >>sys.stderr, "WARNING: not training embedding without initial embeddings"
self.train_embeddings = train_embeddings
if initial_embeddings_file:
e = np.load(initial_embeddings_file)
assert e.shape[0] == vocab_size, "vocab mismatch size? loaded=%s expected=%s" % (e.shape[0], vocab_size)
# TODO code could handle this but just not wanting --embedding-dim set
# when using init embeddings
assert e.shape[1] == embedding_dim, "dimensionality config error. loaded embeddings %s d but --embedding-dim set to %s d" % (e.shape[1], embedding_dim)
assert e.dtype == np.float32, "%s" % e.dtype
self.shared_embeddings = util.shared(e, 'tied_embeddings')
else:
self.shared_embeddings = util.sharedMatrix(vocab_size, embedding_dim,
'tied_embeddings',
orthogonal_init=True)
def slices_for_idxs(self, idxs): # list of vectors (idxs)
# concat all idx sequences into one sequence so we can slice into shared
# embeddings with a _single_ operation. we need to do this only because
# inc_subtensor only allows for one indexing :/
concatenated_idxs = T.concatenate(idxs)
self.concatenated_sequence_embeddings = self.shared_embeddings[concatenated_idxs]
# but now we have to reslice back into this to pick up the embeddings per original
# index sequence. each of these subslices is given to a seperate rnn to run over.
sub_slices = []
offset = 0
for idx in idxs:
seq_len = idx.shape[0]
sub_slices.append(self.concatenated_sequence_embeddings[offset :
offset + seq_len])
offset += seq_len
return sub_slices
def name(self):
return "tied_embeddings"
def params_for_l2_penalty(self):
if not self.train_embeddings:
return []
# for l2 penalty only check the subset of the embeddings related to a specific
# example. ie NOT the entire shared_embeddings, most of which has nothing to do
# with each example.
return [self.concatenated_sequence_embeddings]
def updates_wrt_cost(self, cost, learning_opts):
if not self.train_embeddings:
return []
# _one_ update for the embedding matrix; regardless of the number of rnns running
# over subslices
gradient = util.clipped(T.grad(cost=cost,
wrt=self.concatenated_sequence_embeddings))
learning_rate = learning_opts.learning_rate
return [(self.shared_embeddings,
T.inc_subtensor(self.concatenated_sequence_embeddings,
-learning_rate * gradient))]