Skip to content

Commit

Permalink
Switch to better masking scheme.
Browse files Browse the repository at this point in the history
Layers now have get_input_mask() and get_output_mask() functions
which you can use to get an int8 array representing which data
is masked.
  • Loading branch information
wxs committed Jun 25, 2015
1 parent 4962f18 commit 62392a4
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 118 deletions.
1 change: 0 additions & 1 deletion keras/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import theano
import theano.tensor as T
import numpy as np
from .utils.theano_utils import default_mask_val, get_mask
import types

def softmax(x):
Expand Down
58 changes: 34 additions & 24 deletions keras/layers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from .. import activations, initializations
from ..utils.theano_utils import shared_zeros, floatX, shared_scalar, get_mask, default_mask_val
from ..utils.theano_utils import shared_zeros, floatX, shared_scalar
from ..utils.generic_utils import make_tuple
from .. import regularizers
from .. import constraints
Expand All @@ -20,6 +20,8 @@ def __init__(self):
self.params = []

def connect(self, layer):
if layer.get_output_mask() is not None and not self.supports_mask():
raise Exception("Attached non-masking layer to layer with masked output")
self.previous = layer

def get_output(self, train):
Expand All @@ -31,6 +33,12 @@ def get_input(self, train):
else:
return self.input

def supports_mask(self):
return False

def get_output_mask(self, train=None):
return None

def set_weights(self, weights):
for p, w in zip(self.params, weights):
if p.eval().shape != w.shape:
Expand Down Expand Up @@ -74,6 +82,16 @@ def get_params(self):

return self.params, regs, consts

class MaskedLayer(Layer):
def supports_mask(self):
return True

def get_output_mask(self, train=None):
return self.previous.get_output_mask(train)

def get_input_mask(self, train=None):
return self.previous.get_output_mask(train)


class Merge(object):
def __init__(self, models, mode='sum'):
Expand Down Expand Up @@ -139,54 +157,49 @@ def get_config(self):
"mode":self.mode}


class Dropout(Layer):
class Dropout(MaskedLayer):
'''
Hinton's dropout.
'''
def __init__(self, p, mask_val=default_mask_val):
def __init__(self, p):
super(Dropout,self).__init__()
self.p = p
self.mask_val = shared_scalar(mask_val)

def get_output(self, train):
X = self.get_input(train)
mask = get_mask(X, self.mask_val)
mask = self.get_output_mask(train)
if self.p > 0.:
retain_prob = 1. - self.p
if train:
X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
else:
X *= retain_prob
return mask * X + (1 - mask) * self.mask_val
return X

def get_config(self):
return {"name":self.__class__.__name__,
"p":self.p,
"mask_val":self.mask_val.eval()}
"p":self.p}


class Activation(Layer):
class Activation(MaskedLayer):
'''
Apply an activation function to an output.
'''
def __init__(self, activation, target=0, beta=0.1, mask_val=default_mask_val):
def __init__(self, activation, target=0, beta=0.1):
super(Activation,self).__init__()
self.activation = activations.get(activation)
self.target = target
self.beta = beta
self.mask_val = shared_scalar(mask_val)

def get_output(self, train):
X = self.get_input(train)
mask = get_mask(X, self.mask_val)
return mask * self.activation(X) + (1 - mask) * self.mask_val
return self.activation(X)

def get_config(self):
return {"name":self.__class__.__name__,
"activation":self.activation.__name__,
"target":self.target,
"beta":self.beta,
"mask_val":self.mask_val.eval()}
"beta":self.beta}


class Reshape(Layer):
Expand Down Expand Up @@ -284,7 +297,7 @@ def get_config(self):
"activation":self.activation.__name__}


class TimeDistributedDense(Layer):
class TimeDistributedDense(MaskedLayer):
'''
Apply a same DenseLayer for each dimension[1] (shared_dimension) input
Especially useful after a recurrent network with 'return_sequence=True'
Expand All @@ -293,7 +306,7 @@ class TimeDistributedDense(Layer):
'''
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None,
W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, mask_val=default_mask_val):
W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None):

super(TimeDistributedDense,self).__init__()
self.init = initializations.get(init)
Expand All @@ -309,21 +322,19 @@ def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='lin

self.regularizers = [W_regularizer, b_regularizer]
self.constraints = [W_constraint, b_constraint]
self.mask_val = shared_scalar(mask_val)

if weights is not None:
self.set_weights(weights)

def get_output(self, train):
X = self.get_input(train)
X = X.dimshuffle(1,0,2)
mask = get_mask(X, self.mask_val)

def act_func(X, mask):
return mask * self.activation(T.dot(X, self.W) + self.b) + (1 - mask) * self.mask_val
def act_func(X):
return self.activation(T.dot(X, self.W) + self.b)

output, _ = theano.scan(fn = act_func,
sequences = [X, mask],
sequences = X,
outputs_info=None)
return output.dimshuffle(1,0,2)

Expand All @@ -332,8 +343,7 @@ def get_config(self):
"input_dim":self.input_dim,
"output_dim":self.output_dim,
"init":self.init.__name__,
"activation":self.activation.__name__,
"mask_val":self.mask_val.eval()}
"activation":self.activation.__name__}

class AutoEncoder(Layer):
'''
Expand Down
19 changes: 11 additions & 8 deletions keras/layers/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,29 @@
import theano.tensor as T

from .. import activations, initializations
from ..layers.core import Layer, default_mask_val
from ..layers.core import Layer, MaskedLayer
from ..utils.theano_utils import sharedX

from ..constraints import unitnorm


class Embedding(Layer):
class Embedding(MaskedLayer):
'''
Turn positive integers (indexes) into denses vectors of fixed size.
eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
@input_dim: size of vocabulary (highest input integer + 1)
@out_dim: size of dense representation
'''
def __init__(self, input_dim, output_dim, init='uniform', weights=None, W_regularizer=None, W_constraint=None, zero_is_mask=False, mask_val=default_mask_val):
def __init__(self, input_dim, output_dim, init='uniform', weights=None, W_regularizer=None, W_constraint=None, mask_zero=False):
super(Embedding,self).__init__()
self.init = initializations.get(init)
self.input_dim = input_dim
self.output_dim = output_dim

self.input = T.imatrix()
self.W = self.init((self.input_dim, self.output_dim))
self.zero_is_mask = zero_is_mask

if zero_is_mask:
# This doesn't seem particularly elegant
self.W = sharedX(T.set_subtensor(self.W[0, :], mask_val).eval())
self.mask_zero = mask_zero

self.params = [self.W]
self.constraints = [W_constraint]
Expand All @@ -38,6 +34,13 @@ def __init__(self, input_dim, output_dim, init='uniform', weights=None, W_regula
if weights is not None:
self.set_weights(weights)

def get_output_mask(self, train=None):
X = self.get_input(train)
if not self.mask_zero:
return T.ones_like(X)
else:
return T.ones_like(X) * (1 - T.eq(X,0))

def get_output(self, train=False):
X = self.get_input(train)
out = self.W[X]
Expand Down

0 comments on commit 62392a4

Please sign in to comment.