diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52e872b2685..bfee8c1faf2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- `Predictor.capture_model_internals()` now accepts a regex specifying which modules to capture
 - A new high-performance default `DataLoader`: `MultiProcessDataLoading`.
 
 ### Changed
@@ -18,6 +19,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - The `DataLoader` now decides whether to load instances lazily or not.
   With the `PyTorchDataLoader` this is controlled with the `lazy` parameter, but with
   the `MultiProcessDataLoading` this is controlled by the `max_instances_in_memory` setting.
+
+
+## [v1.1.0rc4](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc4) - 2020-08-20
+
+### Added
+
 - Added a workflow to GitHub Actions that will automatically close unassigned stale issues and
   ping the assignees of assigned stale issues.
 
@@ -40,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   This affected primarily the Roberta SST model.
 - Improved help text for using the --overrides command line flag.
 
+
 ## [v1.1.0rc2](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc2) - 2020-07-31
 
 ### Changed
diff --git a/RELEASE_PROCESS.md b/RELEASE_PROCESS.md
index 2b5859d8c44..682377bcce2 100644
--- a/RELEASE_PROCESS.md
+++ b/RELEASE_PROCESS.md
@@ -56,6 +56,10 @@ click edit. Now copy over the latest section from the [`CHANGELOG.md`](https://r
 
 That's it! GitHub Actions will handle the rest.
 
+
+6. After publishing the release for the core repo, follow the same process to publish a release for the `allennlp-models` repo.
+
+
 ## Fixing a failed release
 
 If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete both the tag and corresponding release from GitHub. After you've pushed a fix, delete the tag from your local clone with
diff --git a/allennlp/nn/util.py b/allennlp/nn/util.py
index 56b597b5820..91125ce5ca3 100644
--- a/allennlp/nn/util.py
+++ b/allennlp/nn/util.py
@@ -1271,7 +1271,6 @@ def batched_span_select(target: torch.Tensor, spans: torch.LongTensor) -> torch.
     embedding_size)`.
 
     This function returns segmented spans in the target with respect to the provided span indices.
-    It does not guarantee element order within each span.
 
     # Parameters
 
@@ -1318,12 +1317,12 @@ def batched_span_select(target: torch.Tensor, spans: torch.LongTensor) -> torch.
     # inclusive, so we want to include indices which are equal to span_widths rather
     # than using it as a non-inclusive upper bound.
     span_mask = max_span_range_indices <= span_widths
-    raw_span_indices = span_ends - max_span_range_indices
-    # We also don't want to include span indices which are less than zero,
-    # which happens because some spans near the beginning of the sequence
-    # have an end index < max_batch_span_width, so we add this to the mask here.
-    span_mask = span_mask & (raw_span_indices >= 0)
-    span_indices = torch.nn.functional.relu(raw_span_indices.float()).long()
+    raw_span_indices = span_starts + max_span_range_indices
+    # We also don't want to include span indices which greater than the sequence_length,
+    # which happens because some spans near the end of the sequence
+    # have a start index + max_batch_span_width > sequence_length, so we add this to the mask here.
+    span_mask = span_mask & (raw_span_indices < target.size(1)) & (0 <= raw_span_indices)
+    span_indices = raw_span_indices * span_mask
 
     # Shape: (batch_size, num_spans, max_batch_span_width, embedding_dim)
     span_embeddings = batched_index_select(target, span_indices)
diff --git a/allennlp/predictors/predictor.py b/allennlp/predictors/predictor.py
index d57b870c837..1cb52ca0a70 100644
--- a/allennlp/predictors/predictor.py
+++ b/allennlp/predictors/predictor.py
@@ -1,5 +1,6 @@
 from typing import List, Iterator, Dict, Tuple, Any, Type
 import json
+import re
 from contextlib import contextmanager
 
 import numpy
@@ -144,7 +145,7 @@ def hook_layers(module, grad_in, grad_out):
         return backward_hooks
 
     @contextmanager
-    def capture_model_internals(self) -> Iterator[dict]:
+    def capture_model_internals(self, module_regex: str = ".*") -> Iterator[dict]:
         """
         Context manager that captures the internal-module outputs of
         this predictor's model. The idea is that you could use it as follows:
@@ -166,8 +167,9 @@ def _add_output(mod, _, outputs):
 
             return _add_output
 
-        for idx, module in enumerate(self._model.modules()):
-            if module != self._model:
+        regex = re.compile(module_regex)
+        for idx, (name, module) in enumerate(self._model.named_modules()):
+            if regex.fullmatch(name) and module != self._model:
                 hook = module.register_forward_hook(add_output(idx))
                 hooks.append(hook)
 
diff --git a/allennlp/version.py b/allennlp/version.py
index 56640f74e26..d681c220d67 100644
--- a/allennlp/version.py
+++ b/allennlp/version.py
@@ -4,7 +4,7 @@
 _MINOR = "1"
 # On master and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "0rc3"
+_PATCH = "0rc4"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = os.environ.get("ALLENNLP_VERSION_SUFFIX", "")
diff --git a/dev-requirements.txt b/dev-requirements.txt
index ee371b0301b..6a896494eaa 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -40,7 +40,7 @@ nr.interface<0.0.4
 
 mkdocs==1.1.2
 mkdocs-material>=5.5.0,<5.6.0
-markdown-include==0.5.1
+markdown-include==0.6.0
 
 #### PACKAGE-UPLOAD PACKAGES ####
 
diff --git a/tests/nn/util_test.py b/tests/nn/util_test.py
index e0a1e2b5331..1655fcf6a53 100644
--- a/tests/nn/util_test.py
+++ b/tests/nn/util_test.py
@@ -1121,16 +1121,16 @@ def test_batched_span_select(self):
             [
                 [
                     [[0, 0], [-1, -1], [-1, -1], [-1, -1]],
-                    [[2, 2], [1, 1], [-1, -1], [-1, -1]],
-                    [[8, 8], [7, 7], [6, 6], [5, 5]],
+                    [[1, 1], [2, 2], [-1, -1], [-1, -1]],
+                    [[5, 5], [6, 6], [7, 7], [8, 8]],
                     [[10, 10], [-1, -1], [-1, -1], [-1, -1]],
                 ],
                 [[[i, i], [-1, -1], [-1, -1], [-1, -1]] for i in range(3, -1, -1)],
                 [
-                    [[3, 3], [2, 2], [1, 1], [0, 0]],
-                    [[4, 4], [3, 3], [2, 2], [1, 1]],
-                    [[5, 5], [4, 4], [3, 3], [2, 2]],
-                    [[11, 11], [10, 10], [-1, -1], [-1, -1]],
+                    [[0, 0], [1, 1], [2, 2], [3, 3]],
+                    [[1, 1], [2, 2], [3, 3], [4, 4]],
+                    [[2, 2], [3, 3], [4, 4], [5, 5]],
+                    [[10, 10], [11, 11], [-1, -1], [-1, -1]],
                 ],
             ],
         )
diff --git a/tests/predictors/predictor_test.py b/tests/predictors/predictor_test.py
index c3e280836e9..1c04e4255af 100644
--- a/tests/predictors/predictor_test.py
+++ b/tests/predictors/predictor_test.py
@@ -78,3 +78,36 @@ def test_get_gradients_when_requires_grad_is_false(self):
             assert bool(grads)
         # ensure that no side effects remain
         assert not embedding_layer.weight.requires_grad
+
+    def test_captures_model_internals(self):
+        inputs = {"sentence": "I always write unit tests"}
+
+        archive = load_archive(
+            self.FIXTURES_ROOT
+            / "basic_classifier"
+            / "embedding_with_trainable_is_false"
+            / "model.tar.gz"
+        )
+        predictor = Predictor.from_archive(archive)
+
+        with predictor.capture_model_internals() as internals:
+            predictor.predict_json(inputs)
+
+        assert len(internals) == 10
+
+        with predictor.capture_model_internals(r"_text_field_embedder.*") as internals:
+            predictor.predict_json(inputs)
+        assert len(internals) == 2
+
+    def test_predicts_batch_json(self):
+        inputs = {"sentence": "I always write unit tests"}
+
+        archive = load_archive(
+            self.FIXTURES_ROOT
+            / "basic_classifier"
+            / "embedding_with_trainable_is_false"
+            / "model.tar.gz"
+        )
+        predictor = Predictor.from_archive(archive)
+        results = predictor.predict_batch_json([inputs] * 3)
+        assert len(results) == 3