diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9fd45dd331..dce13fadbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,7 @@
* Fix processing of `--set` options (#5067, @marwinxxii)
* Lowercase user-added header names and emit a log message to notify the user when using HTTP/2 (#4746, @mhils)
* Exit early if there are errors on startup (#4544, @mhils)
+* Fixed encoding guessing: only search for meta tags in HTML bodies (##4566, @Prinzhorn)
## 28 September 2021: mitmproxy 7.0.4
diff --git a/mitmproxy/http.py b/mitmproxy/http.py
index 11af7b3e37..743c46c99b 100644
--- a/mitmproxy/http.py
+++ b/mitmproxy/http.py
@@ -414,9 +414,10 @@ def _guess_encoding(self, content: bytes = b"") -> str:
if "json" in self.headers.get("content-type", ""):
enc = "utf8"
if not enc:
- meta_charset = re.search(rb"""]+charset=['"]?([^'">]+)""", content, re.IGNORECASE)
- if meta_charset:
- enc = meta_charset.group(1).decode("ascii", "ignore")
+ if "html" in self.headers.get("content-type", ""):
+ meta_charset = re.search(rb"""]+charset=['"]?([^'">]+)""", content, re.IGNORECASE)
+ if meta_charset:
+ enc = meta_charset.group(1).decode("ascii", "ignore")
if not enc:
if "text/css" in self.headers.get("content-type", ""):
# @charset rule must be the very first thing.
diff --git a/test/mitmproxy/test_http.py b/test/mitmproxy/test_http.py
index ee8c9600d1..eb64821f3f 100644
--- a/test/mitmproxy/test_http.py
+++ b/test/mitmproxy/test_http.py
@@ -1098,6 +1098,7 @@ def test_guess_json(self):
def test_guess_meta_charset(self):
r = tresp(content=b'\xe6\x98\x8e\xe4\xbc\xaf')
+ r.headers["content-type"] = "text/html"
# "鏄庝集" is decoded form of \xe6\x98\x8e\xe4\xbc\xaf in gb18030
assert "鏄庝集" in r.text