/
layers.py
executable file
·159 lines (128 loc) · 5.74 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import math
import torch
import numpy as np
import torch.nn as nn
import pytorch_utils as my_utils
from torch.autograd import Variable
import global_constants as gc
#######################################
class ScaledEmbedding(nn.Embedding):
"""
Embedding layer that initialises its values
to using a normal variable scaled by the inverse
of the embedding dimension.
"""
def reset_parameters(self):
"""
Initialize parameters.
"""
self.weight.data.normal_(0, 1.0 / self.embedding_dim)
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class ZeroEmbedding(nn.Embedding):
"""
Embedding layer that initialises its values
to using a normal variable scaled by the inverse
of the embedding dimension.
Used for biases.
"""
def reset_parameters(self):
"""
Initialize parameters.
"""
self.weight.data.zero_()
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class ManualEmbedding(nn.Embedding):
"""
Embedding layer that initialises its values
to using a normal variable of input standard deviation
"""
# def __init__(self, std):
# self.std = std
def reset_parameters(self):
"""
Initialize parameters.
"""
self.weight.data.normal_(0, 0.01)
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class PositionEmbedding(nn.Embedding):
def __init__(self,num_embeddings, embedding_dim, padding_idx=gc.PADDING_IDX, left_pad=True):
'''
:param num_embeddings: equal to length of the input sequence, (max length in user sequences or max length in
item sequences
:param embedding_dim: equal to embedding dim of user or item or word.
:param padding_idx: normally 0, defined in pytorch_utils.py
:param left_pad: default is True, meaning that we pad the sentences from the left.
'''
super(PositionEmbedding, self).__init__(num_embeddings, embedding_dim, padding_idx)
self._padding_idx = padding_idx
self._left_pad = left_pad
self._num_embeddings = num_embeddings #max position, or length of input sequence
self._embedding_dim = embedding_dim
self.init_parameters()
def init_parameters(self):
'''
init the parameters in here
:return:
'''
position_enc = np.array([
[pos / np.power(10000, 2 * (j // 2) / self._embedding_dim) for j in range(self._embedding_dim)]
if pos != gc.PADDING_IDX else np.zeros(self._embedding_dim) for pos in range(self._num_embeddings)])
position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i
position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1
# return my_utils.numpy2tensor(position_enc).type(torch.FloatTensor)
self.weight.data = my_utils.numpy2tensor(position_enc).type(torch.FloatTensor)
def forward(self, input, incremental_state=None):
"""Input is expected to be of size [bsz x seqlen]."""
if incremental_state is not None:
# positions is the same for every token when decoding a single step
positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1))
else:
positions = my_utils.make_positions(input.data, self._padding_idx, self._left_pad)
return super(PositionEmbedding, self).forward(Variable(positions))
def Linear(in_features, out_features, dropout=0):
m = nn.Linear(in_features, out_features)
m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) * 1. / in_features))
m.bias.data.zero_()
return nn.utils.weight_norm(m)
class FasterConvTBC(torch.nn.Module):
"""1D convolution over an input of shape (time x batch x channel)
The implementation uses gemm to perform the convolution. This implementation
is faster than cuDNN for small kernel sizes.
"""
def __init__(self, in_channels, out_channels, kernel_size, padding=0, groups=1):
super(FasterConvTBC, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = my_utils._single(kernel_size)
self.padding = my_utils._single(padding)
self.groups = groups
self.weight = torch.nn.Parameter(torch.Tensor(
self.kernel_size[0], in_channels, out_channels))
self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
def forward(self, input):
return input.contiguous().conv_tbc(self.weight, self.bias, self.padding[0])
def __repr__(self):
s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}'
', padding={padding}')
if self.bias is None:
s += ', bias=False'
s += ')'
return s.format(name=self.__class__.__name__, **self.__dict__)
def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
"""Weight-normalized Conv1d layer"""
m = FasterConvTBC(in_channels, out_channels, kernel_size, **kwargs)
std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
m.weight.data.normal_(mean=0, std=std)
m.bias.data.zero_()
return nn.utils.weight_norm(m, dim=2)
def ConvBCT(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
"""Weight-normalized Conv1d layer"""
"""C: channel, or embedding dim, T: number of words in the seq, B: batch size"""
m = nn.Conv1d(in_channels, out_channels, kernel_size, **kwargs)
std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
m.weight.data.normal_(mean=0, std=std)
m.bias.data.zero_()
return nn.utils.weight_norm(m, dim=2)