scrapy · redapple · Apr 6, 2016 · Feb 8, 2016 · Apr 4, 2016 · Feb 8, 2016
diff --git a/NEWS b/NEWS
@@ -13,6 +13,9 @@ Changes to safe_url_string:
 
 Package is now properly marked as ``zip_safe``.
 
+html_body_declared_encoding also detects encoding
+when not sole attribute in <meta>
+
 1.13.0 (2015-11-05)
 -------------------
 

diff --git a/tests/test_encoding.py b/tests/test_encoding.py
@@ -8,10 +8,14 @@ class RequestEncodingTests(unittest.TestCase):
         # Content-Type as meta http-equiv
         b"""<meta http-equiv="content-type" content="text/html;charset=UTF-8" />""",
         b"""\n<meta http-equiv="Content-Type"\ncontent="text/html; charset=utf-8">""",
+        b"""<meta http-equiv="Content-Type" content="text/html" charset="utf-8">""",
+        b"""<meta http-equiv=Content-Type content="text/html" charset='utf-8'>""",
+        b"""<meta http-equiv="Content-Type" content\t=\n"text/html" charset\t="utf-8">""",
         b"""<meta content="text/html; charset=utf-8"\n http-equiv='Content-Type'>""",
         b""" bad html still supported < meta http-equiv='Content-Type'\n content="text/html; charset=utf-8">""",
         # html5 meta charset
         b"""<meta charset="utf-8">""",
+        b"""<meta charset =\n"utf-8">""",
         # xml encoding
         b"""<?xml version="1.0" encoding="utf-8"?>""",
     ]

diff --git a/w3lib/encoding.py b/w3lib/encoding.py
@@ -22,14 +22,25 @@ def http_content_type_encoding(content_type):
 
 # regexp for parsing HTTP meta tags
 _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
+_SKIP_ATTRS = '''(?x)(?:\\s+
+    [^=<>/\\s"'\x00-\x1f\x7f]+  # Attribute name
+    (?:\\s*=\\s*
+    (?:  # ' and " are entity encoded (&apos;, &quot;), so no need for \', \"
+        '[^']*'   # attr in '
+        |
+        "[^"]*"   # attr in "
+        |
+        [^'"\\s]+  # attr having no ' nor "
+    ))?
+)*?'''
 _HTTPEQUIV_RE = _TEMPLATE % ('http-equiv', 'Content-Type')
 _CONTENT_RE = _TEMPLATE % ('content', r'(?P<mime>[^;]+);\s*charset=(?P<charset>[\w-]+)')
 _CONTENT2_RE = _TEMPLATE % ('charset', r'(?P<charset2>[\w-]+)')
 _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P<xmlcharset>[\w-]+)')
 
 # check for meta tags, or xml decl. and stop search if a body tag is encountered
-_BODY_ENCODING_PATTERN = r'<\s*(?:meta(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)' % \
-                        (_HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
+_BODY_ENCODING_PATTERN = r'<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)' % (
+    _SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
 _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I)
 _BODY_ENCODING_BYTES_RE = re.compile(_BODY_ENCODING_PATTERN.encode('ascii'), re.I)