Changelog

matt-gardner · Aug 26, 2020 · cde06e6 · cde06e6
2 parents 1b08fd6 + 2dd335e
commit cde06e6
Show file tree

Hide file tree

Showing 8 changed files with 64 additions and 18 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- `Predictor.capture_model_internals()` now accepts a regex specifying which modules to capture
 - A new high-performance default `DataLoader`: `MultiProcessDataLoading`.
 
 ### Changed
@@ -18,6 +19,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - The `DataLoader` now decides whether to load instances lazily or not.
   With the `PyTorchDataLoader` this is controlled with the `lazy` parameter, but with
   the `MultiProcessDataLoading` this is controlled by the `max_instances_in_memory` setting.
+
+
+## [v1.1.0rc4](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc4) - 2020-08-20
+
+### Added
+
 - Added a workflow to GitHub Actions that will automatically close unassigned stale issues and
   ping the assignees of assigned stale issues.
 
@@ -40,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   This affected primarily the Roberta SST model.
 - Improved help text for using the --overrides command line flag.
 
+
 ## [v1.1.0rc2](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc2) - 2020-07-31
 
 ### Changed

diff --git a/RELEASE_PROCESS.md b/RELEASE_PROCESS.md
@@ -56,6 +56,10 @@ click edit. Now copy over the latest section from the [`CHANGELOG.md`](https://r
 
 That's it! GitHub Actions will handle the rest.
 
+
+6. After publishing the release for the core repo, follow the same process to publish a release for the `allennlp-models` repo.
+
+
 ## Fixing a failed release
 
 If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete both the tag and corresponding release from GitHub. After you've pushed a fix, delete the tag from your local clone with

diff --git a/allennlp/nn/util.py b/allennlp/nn/util.py
@@ -1271,7 +1271,6 @@ def batched_span_select(target: torch.Tensor, spans: torch.LongTensor) -> torch.
     embedding_size)`.
 
     This function returns segmented spans in the target with respect to the provided span indices.
-    It does not guarantee element order within each span.
 
     # Parameters
 
@@ -1318,12 +1317,12 @@ def batched_span_select(target: torch.Tensor, spans: torch.LongTensor) -> torch.
     # inclusive, so we want to include indices which are equal to span_widths rather
     # than using it as a non-inclusive upper bound.
     span_mask = max_span_range_indices <= span_widths
-    raw_span_indices = span_ends - max_span_range_indices
-    # We also don't want to include span indices which are less than zero,
-    # which happens because some spans near the beginning of the sequence
-    # have an end index < max_batch_span_width, so we add this to the mask here.
-    span_mask = span_mask & (raw_span_indices >= 0)
-    span_indices = torch.nn.functional.relu(raw_span_indices.float()).long()
+    raw_span_indices = span_starts + max_span_range_indices
+    # We also don't want to include span indices which greater than the sequence_length,
+    # which happens because some spans near the end of the sequence
+    # have a start index + max_batch_span_width > sequence_length, so we add this to the mask here.
+    span_mask = span_mask & (raw_span_indices < target.size(1)) & (0 <= raw_span_indices)
+    span_indices = raw_span_indices * span_mask
 
     # Shape: (batch_size, num_spans, max_batch_span_width, embedding_dim)
     span_embeddings = batched_index_select(target, span_indices)

diff --git a/allennlp/predictors/predictor.py b/allennlp/predictors/predictor.py
@@ -1,5 +1,6 @@
 from typing import List, Iterator, Dict, Tuple, Any, Type
 import json
+import re
 from contextlib import contextmanager
 
 import numpy
@@ -144,7 +145,7 @@ def hook_layers(module, grad_in, grad_out):
         return backward_hooks
 
     @contextmanager
-    def capture_model_internals(self) -> Iterator[dict]:
+    def capture_model_internals(self, module_regex: str = ".*") -> Iterator[dict]:
         """
         Context manager that captures the internal-module outputs of
         this predictor's model. The idea is that you could use it as follows:
@@ -166,8 +167,9 @@ def _add_output(mod, _, outputs):
 
             return _add_output
 
-        for idx, module in enumerate(self._model.modules()):
-            if module != self._model:
+        regex = re.compile(module_regex)
+        for idx, (name, module) in enumerate(self._model.named_modules()):
+            if regex.fullmatch(name) and module != self._model:
                 hook = module.register_forward_hook(add_output(idx))
                 hooks.append(hook)
 

diff --git a/allennlp/version.py b/allennlp/version.py
@@ -4,7 +4,7 @@
 _MINOR = "1"
 # On master and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "0rc3"
+_PATCH = "0rc4"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = os.environ.get("ALLENNLP_VERSION_SUFFIX", "")

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -40,7 +40,7 @@ nr.interface<0.0.4
 
 mkdocs==1.1.2
 mkdocs-material>=5.5.0,<5.6.0
-markdown-include==0.5.1
+markdown-include==0.6.0
 
 #### PACKAGE-UPLOAD PACKAGES ####
 

diff --git a/tests/nn/util_test.py b/tests/nn/util_test.py
@@ -1121,16 +1121,16 @@ def test_batched_span_select(self):
             [
                 [
                     [[0, 0], [-1, -1], [-1, -1], [-1, -1]],
-                    [[2, 2], [1, 1], [-1, -1], [-1, -1]],
-                    [[8, 8], [7, 7], [6, 6], [5, 5]],
+                    [[1, 1], [2, 2], [-1, -1], [-1, -1]],
+                    [[5, 5], [6, 6], [7, 7], [8, 8]],
                     [[10, 10], [-1, -1], [-1, -1], [-1, -1]],
                 ],
                 [[[i, i], [-1, -1], [-1, -1], [-1, -1]] for i in range(3, -1, -1)],
                 [
-                    [[3, 3], [2, 2], [1, 1], [0, 0]],
-                    [[4, 4], [3, 3], [2, 2], [1, 1]],
-                    [[5, 5], [4, 4], [3, 3], [2, 2]],
-                    [[11, 11], [10, 10], [-1, -1], [-1, -1]],
+                    [[0, 0], [1, 1], [2, 2], [3, 3]],
+                    [[1, 1], [2, 2], [3, 3], [4, 4]],
+                    [[2, 2], [3, 3], [4, 4], [5, 5]],
+                    [[10, 10], [11, 11], [-1, -1], [-1, -1]],
                 ],
             ],
         )

diff --git a/tests/predictors/predictor_test.py b/tests/predictors/predictor_test.py
@@ -78,3 +78,36 @@ def test_get_gradients_when_requires_grad_is_false(self):
             assert bool(grads)
         # ensure that no side effects remain
         assert not embedding_layer.weight.requires_grad
+
+    def test_captures_model_internals(self):
+        inputs = {"sentence": "I always write unit tests"}
+
+        archive = load_archive(
+            self.FIXTURES_ROOT
+            / "basic_classifier"
+            / "embedding_with_trainable_is_false"
+            / "model.tar.gz"
+        )
+        predictor = Predictor.from_archive(archive)
+
+        with predictor.capture_model_internals() as internals:
+            predictor.predict_json(inputs)
+
+        assert len(internals) == 10
+
+        with predictor.capture_model_internals(r"_text_field_embedder.*") as internals:
+            predictor.predict_json(inputs)
+        assert len(internals) == 2
+
+    def test_predicts_batch_json(self):
+        inputs = {"sentence": "I always write unit tests"}
+
+        archive = load_archive(
+            self.FIXTURES_ROOT
+            / "basic_classifier"
+            / "embedding_with_trainable_is_false"
+            / "model.tar.gz"
+        )
+        predictor = Predictor.from_archive(archive)
+        results = predictor.predict_batch_json([inputs] * 3)
+        assert len(results) == 3