-
Notifications
You must be signed in to change notification settings - Fork 1
/
heads.py
134 lines (103 loc) · 7.59 KB
/
heads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import theano
import theano.tensor as T
import numpy as np
from utils import softplus, sigmoid, init_weight, focus_shift, circular_convolution, pass_or_fail, RMSprop
class readHead:
def __init__(self, controller_size,
memory_slots, slot_size, batch_size):
self.controller_size = controller_size
self.memory_slots = memory_slots
self.slot_size = slot_size
self.batch_size = batch_size
# For now we'll only allow one shift backwards or forwards
self.weights = {
"controller->key" : init_weight(self.controller_size, self.slot_size),
"controller->shift" : init_weight(self.controller_size, 3),
"controller->sharpen" : init_weight(self.controller_size, 1),
"controller->strengthen" : init_weight(self.controller_size, 1),
"controller->interpolation" : init_weight(self.controller_size, 1),
}
def get_weights(self):
return [self.weights["controller->key"], self.weights["controller->shift"], self.weights["controller->sharpen"], self.weights["controller->strengthen"], self.weights["controller->interpolation"]]
def produce(self, controller):
"""
readHead.recurrence(controller, previous_weight) -> key (batchsize x N),
shift (batchsize x 3),
sharpen (batchsize x 1),
strengthen (batchsize x 1),
interpolation (batchsize x 1)
produces controller parameters to manipulate/read memory
@param controller: a batchsize x controller_size matrix, representing the output of the controller
"""
# key, add, erase -> batchsize x N
key = T.dot(controller, self.weights["controller->key"])
# shift -> batchsize x 3
shift = T.nnet.softmax(T.dot(controller, self.weights["controller->shift"])) # SOFTMAX
backward_shift = shift[:, 0]
stay_forward_shift = shift[:, 1:3] # represents the shift values for STAY and FORWARD
zeros_size = self.memory_slots - 3
# We are concatenating along the second axis, we're basically moving the first element (which represents the backward shift) to the front
# ex:
# There are 7 memory slots
# Useless zeros are wrapped in [] to increase history.
# 0.2 0.9 0.1 [0.0 0.0 0.0 0.0] -> 0.9 0.1 [0.0 0.0 0.0 0.0] 0.2
true_shift = T.concatenate([stay_forward_shift, T.zeros([self.batch_size, zeros_size]), backward_shift.reshape([self.batch_size, 1])], axis = 1) # WRAP
# sharpen, strengthen, interpolation -> batchsize x 1
# sharpen and strengthen must both be greater than or equal to 1, so we'll apply the softplus function (Graves et al., 2016)
sharpen = softplus(T.dot(controller, self.weights["controller->sharpen"])) # SOFTPLUS
strengthen = softplus(T.dot(controller, self.weights["controller->strengthen"])) # SOFTPLUS
interpolation = T.nnet.sigmoid(T.dot(controller, self.weights["controller->interpolation"])) # SIGMOID
return key, true_shift, T.addbroadcast(sharpen, 1), T.addbroadcast(strengthen, 1), T.addbroadcast(interpolation, 1)
class writeHead:
def __init__(self, controller_size,
memory_slots, slot_size,
batch_size):
self.controller_size = controller_size
self.memory_slots = memory_slots
self.slot_size = slot_size
self.batch_size = batch_size
# For now we'll only allow one shift backwards or forwards
self.weights = {
"controller->key" : init_weight(self.controller_size, self.slot_size),
"controller->add" : init_weight(self.controller_size, self.slot_size),
"controller->erase" : init_weight(self.controller_size, self.slot_size),
"controller->shift" : init_weight(self.controller_size, 3),
"controller->sharpen" : init_weight(self.controller_size, 1),
"controller->strengthen" : init_weight(self.controller_size, 1),
"controller->interpolation" : init_weight(self.controller_size, 1),
}
def get_weights(self):
return [self.weights["controller->key"], self.weights["controller->add"], self.weights["controller->erase"], self.weights["controller->shift"], self.weights["controller->sharpen"], self.weights["controller->strengthen"], self.weights["controller->interpolation"]]
def produce(self, controller):
"""
writeHead.recurrence(controller, previous_weight) -> key (batchsize x N),
add (batchsize x N),
erase (batchsize x N),
shift (batchsize x 3),
sharpen (batchsize x 1),
strengthen (batchsize x 1),
interpolation (batchsize x 1)
produces controller parameters to manipulate/write memory
@param controller: a batchsize x controller_size matrix, representing the output of the controller
"""
# key, add, erase -> batchsize x N
key = T.dot(controller, self.weights["controller->key"])
add = T.tanh(T.dot(controller, self.weights["controller->add"]))
erase = T.nnet.sigmoid(T.dot(controller, self.weights["controller->erase"])) # SIGMOID
# shift -> batchsize x 3
shift = T.nnet.softmax(T.dot(controller, self.weights["controller->shift"])) # SOFTMAX
backward_shift = shift[:, 0]
stay_forward_shift = shift[:, 1:3] # represents the shift values for STAY and FORWARD
zeros_size = self.memory_slots - 3
# We are concatenating along the second axis, we're basically moving the first element (which represents the backward shift) to the front
# ex:
# There are 7 memory slots
# Useless zeros are wrapped in [] to increase history.
# 0.2 0.9 0.1 [0.0 0.0 0.0 0.0] -> 0.9 0.1 [0.0 0.0 0.0 0.0] 0.2
true_shift = T.concatenate([stay_forward_shift, T.zeros([self.batch_size, zeros_size]), backward_shift.reshape([self.batch_size, 1])], axis = 1) # WRAP
# sharpen, strengthen, interpolation -> batchsize x 1
# sharpen and strengthen must both be greater than or equal to 1, so we'll apply the softplus function (Graves et al., 2016)
sharpen = softplus(T.dot(controller, self.weights["controller->sharpen"])) # SOFTPLUS
strengthen = softplus(T.dot(controller, self.weights["controller->strengthen"])) # SOFTPLUS
interpolation = T.nnet.sigmoid(T.dot(controller, self.weights["controller->interpolation"])) # SIGMOID
return key, add, erase, true_shift, T.addbroadcast(sharpen, 1), T.addbroadcast(strengthen, 1), T.addbroadcast(interpolation, 1)