diff --git a/bleach/html5lib_shim.py b/bleach/html5lib_shim.py
index 6fc90485..d121953b 100644
--- a/bleach/html5lib_shim.py
+++ b/bleach/html5lib_shim.py
@@ -385,7 +385,17 @@ def __iter__(self):
yield token
if last_error_token:
- yield last_error_token
+ if last_error_token["data"] == "eof-in-tag-name":
+ # Handle the case where the text being parsed ends with <
+ # followed by a series of characters. It's treated as a tag
+ # name that abruptly ends, but we should treat that like
+ # character data
+ yield {
+ "type": TAG_TOKEN_TYPE_CHARACTERS,
+ "data": "<" + self.currentToken["name"],
+ }
+ else:
+ yield last_error_token
def consumeEntity(self, allowedChar=None, fromAttribute=False):
# If this tokenizer is set to consume entities, then we can let the
diff --git a/tests/test_clean.py b/tests/test_clean.py
index b9c262ab..ab112536 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -156,6 +156,22 @@ def test_bare_entities_get_escaped_correctly(text, expected):
assert clean(text) == expected
+@pytest.mark.parametrize(
+ "text, expected",
+ [
+ ("x", "<y>"),
+ ],
+)
+def test_lessthan_escaping(text, expected):
+ # Tests whether < gets escaped correctly in a series of edge cases where
+ # the html5lib tokenizer hits an error because it's not the beginning of a
+ # tag.
+ assert clean(text) == expected
+
+
@pytest.mark.parametrize(
"text, expected",
[