forked from loudinthecloud/pytorch-ntm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
copytask_test.py
164 lines (136 loc) · 5.76 KB
/
copytask_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""Copy Task NTM model."""
import random
from attr import attrs, attrib, Factory
import torch
from torch import nn
from torch.autograd import Variable
from torch import optim
import numpy as np
import params
from ntm.aio import EncapsulatedNTM
data_train=[]
def gen(batch_size,
seq_width,
min_len,
max_len):
seq_len = random.randint(min_len, max_len)
seq = np.random.binomial(1, 0.5, (seq_len, batch_size, seq_width))
seq = Variable(torch.from_numpy(seq))
# The input includes an additional channel used for the delimiter
inp = Variable(torch.zeros(seq_len + 1, batch_size, seq_width + 1))
inp[:seq_len, :, :seq_width] = seq
inp[seq_len, :, seq_width] = 1.0 # delimiter in our control channel
outp = seq.clone()
return inp,outp
# Generator of randomized test sequences
def dataloader_train(num_batches,
batch_size,
seq_width,
min_len,
max_len,
train_ratio):
"""Generator of random sequences for the copy task.
Creates random batches of "bits" sequences.
All the sequences within each batch have the same length.
The length is [`min_len`, `max_len`]
:param num_batches: Total number of batches to generate.
:param seq_width: The width of each item in the sequence.
:param batch_size: Batch size.
:param min_len: Sequence minimum length.
:param max_len: Sequence maximum length.
NOTE: The input width is `seq_width + 1`, the additional input
contain the delimiter.
"""
for batch_num in range(num_batches):
if batch_num/num_batches < train_ratio:
# All batches have the same sequence length
inp,outp=gen(batch_size,seq_width,min_len,max_len)
data_train.append((inp,outp))
yield batch_num+1, inp.float().to(params.device), outp.float().to(params.device)
else:
inp, outp = data_train[batch_num % len(data_train)]
yield batch_num+1, inp.float().to(params.device), outp.float().to(params.device)
def dataloader_valid(num_batches,
batch_size,
seq_width,
min_len,
max_len):
res=[]
for batch_num in range(num_batches):
# All batches have the same sequence length
inp,outp=gen(batch_size,seq_width,min_len,max_len)
res.append((batch_num+1, inp.float().to(params.device), outp.float().to(params.device)))
return res
@attrs
class CopyTaskParams(object):
name = attrib(default="copy-task-test")
controller_size = attrib(default=100, convert=int)
controller_layers = attrib(default=1,convert=int)
num_heads = attrib(default=1, convert=int)
sequence_width = attrib(default=1, convert=int)
slen_min_train = attrib(default=1,convert=int)
slen_max_train = attrib(default=10, convert=int)
slen_min_valid = attrib(default=11, convert=int)
slen_max_valid = attrib(default=19, convert=int)
memory_n = attrib(default=128, convert=int)
memory_m = attrib(default=20, convert=int)
num_samples_train = attrib(default=1000000, convert=int)
num_samples_valid = attrib(default=1000, convert=int)
batch_size = attrib(default=1, convert=int)
rmsprop_lr = attrib(default=1e-4, convert=float)
rmsprop_momentum = attrib(default=0.9, convert=float)
rmsprop_alpha = attrib(default=0.95, convert=float)
train_ratio = attrib(default=0.1, convert=float)
#
# To create a network simply instantiate the `:class:CopyTaskModelTraining`,
# all the components will be wired with the default values.
# In case you'd like to change any of defaults, do the following:
#
# > params = CopyTaskParams(batch_size=4)
# > model = CopyTaskModelTraining(params=params)
#
# Then use `model.net`, `model.optimizer` and `model.criterion` to train the
# network. Call `model.train_batch` for training and `model.evaluate`
# for evaluating.
#
# You may skip this alltogether, and use `:class:CopyTaskNTM` directly.
#
@attrs
class CopyTaskModelTraining(object):
params = attrib(default=Factory(CopyTaskParams))
net = attrib()
dataloader_train = attrib()
dataloader_valid = attrib()
criterion = attrib()
optimizer = attrib()
@net.default
def default_net(self):
# We have 1 additional input for the delimiter which is passed on a
# separate "control" channel
net = EncapsulatedNTM(self.params.sequence_width + 1, self.params.sequence_width,
self.params.controller_size, self.params.controller_layers,
self.params.num_heads,
self.params.memory_n, self.params.memory_m).to(params.device)
return net
@dataloader_train.default
def default_dataloader_train(self):
return dataloader_train(int(self.params.num_samples_train/self.params.batch_size), self.params.batch_size,
self.params.sequence_width,
self.params.slen_min_train,
self.params.slen_max_train,
self.params.train_ratio)
@dataloader_valid.default
def default_dataloader_valid(self):
return dataloader_valid(int(self.params.num_samples_valid/self.params.batch_size),self.params.batch_size,
self.params.sequence_width,
self.params.slen_min_valid,
self.params.slen_max_valid)
@criterion.default
def default_criterion(self):
return nn.BCELoss()
@optimizer.default
def default_optimizer(self):
return optim.RMSprop(self.net.parameters(),
momentum=self.params.rmsprop_momentum,
alpha=self.params.rmsprop_alpha,
lr=self.params.rmsprop_lr)