Merge pull request #761 from udibr/RecurrentStack

fix minor bugs when handling masks in SequenceGenerator/Attention
mila-iqia · Jul 14, 2015 · 1e0aca9 · 1e0aca9
2 parents cfd0633 + efbe048
commit 1e0aca9
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 10 deletions.
diff --git a/blocks/bricks/attention.py b/blocks/bricks/attention.py
@@ -605,8 +605,8 @@ def take_glimpses(self, **kwargs):
             kwargs.pop(self.preprocessed_attended_name, None),
             kwargs.pop(self.attended_mask_name, None),
             **dict_union(states, glimpses_needed))
-        if kwargs:
-            raise ValueError("extra args to take_glimpses: {}".format(kwargs))
+        # At this point kwargs may contain additional items.
+        # e.g. AttentionRecurrent.transition.apply.contexts
         return result
 
     @take_glimpses.property('outputs')
@@ -634,13 +634,15 @@ def compute_states(self, **kwargs):
             Current states computed by `self.transition`.
 
         """
-        # Masks are not mandatory, that's why 'must_have=False'
-        sequences = dict_subset(kwargs, self._sequence_names,
-                                pop=True, must_have=False)
+        # make sure we are not popping the mask
+        normal_inputs = [name for name in self._sequence_names
+                         if 'mask' not in name]
+        sequences = dict_subset(kwargs, normal_inputs, pop=True)
         glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
         if self.add_contexts:
             kwargs.pop(self.attended_name)
-            kwargs.pop(self.attended_mask_name)
+            # attended_mask_name can be optional
+            kwargs.pop(self.attended_mask_name, None)
 
         sequences.update(self.distribute.apply(
             as_dict=True, **dict_subset(dict_union(sequences, glimpses),

diff --git a/blocks/bricks/sequence_generators.py b/blocks/bricks/sequence_generators.py
@@ -248,7 +248,8 @@ def cost_matrix(self, application_call, outputs, mask=None, **kwargs):
 
         # Prepare input for the iterative part
         states = dict_subset(kwargs, self._state_names, must_have=False)
-        contexts = dict_subset(kwargs, self._context_names)
+        # masks in context are optional (e.g. `attended_mask`)
+        contexts = dict_subset(kwargs, self._context_names, must_have=False)
         feedback = self.readout.feedback(outputs)
         inputs = self.fork.apply(feedback, as_dict=True)
 
@@ -297,7 +298,8 @@ def generate(self, outputs, **kwargs):
 
         """
         states = dict_subset(kwargs, self._state_names)
-        contexts = dict_subset(kwargs, self._context_names)
+        # masks in context are optional (e.g. `attended_mask`)
+        contexts = dict_subset(kwargs, self._context_names, must_have=False)
         glimpses = dict_subset(kwargs, self._glimpse_names)
 
         next_glimpses = self.transition.take_glimpses(

diff --git a/blocks/monitoring/evaluators.py b/blocks/monitoring/evaluators.py
@@ -262,9 +262,15 @@ def _compile(self):
         if self.theano_buffer.accumulation_updates:
             updates = OrderedDict()
             updates.update(self.theano_buffer.accumulation_updates)
-            if self.updates:
-                updates.update(self.updates)
             inputs += self.theano_buffer.inputs
+        if self.updates:
+            # Handle the case in which we dont have any theano variables
+            # to evaluate but we do have MonitoredQuantity
+            # that may require an update of their own
+            if updates is None:
+                updates = self.updates
+            else:
+                updates.update(self.updates)
         inputs += self.monitored_quantities_buffer.inputs
         outputs = self.monitored_quantities_buffer.requires