Moved file encoding to charset-normalizer instead of chardet that is …

…causing too much issues. #2196
morpheus65535 · Jul 7, 2023 · dd9ce4d · dd9ce4d
1 parent 90ac551
commit dd9ce4d
Show file tree

Hide file tree

Showing 16 changed files with 4,606 additions and 3 deletions.
diff --git a/bazarr/subtitles/indexer/utils.py b/bazarr/subtitles/indexer/utils.py
@@ -7,7 +7,7 @@
 from guess_language import guess_language
 from subliminal_patch import core
 from subzero.language import Language
-from chardet import detect
+from charset_normalizer import detect
 
 from app.config import settings
 from constants import hi_regex

diff --git a/bazarr/utilities/helper.py b/bazarr/utilities/helper.py
@@ -4,7 +4,7 @@
 import logging
 import hashlib
 
-from chardet import detect
+from charset_normalizer import detect
 from bs4 import UnicodeDammit
 
 from app.config import settings

diff --git a/libs/charset_normalizer/__init__.py b/libs/charset_normalizer/__init__.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""
+Charset-Normalizer
+~~~~~~~~~~~~~~
+The Real First Universal Charset Detector.
+A library that helps you read text from an unknown charset encoding.
+Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
+All IANA character set names for which the Python core library provides codecs are supported.
+
+Basic usage:
+   >>> from charset_normalizer import from_bytes
+   >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
+   >>> best_guess = results.best()
+   >>> str(best_guess)
+   'Bсеки човек има право на образование. Oбразованието!'
+
+Others methods and usages are available - see the full documentation
+at <https://github.com/Ousret/charset_normalizer>.
+:copyright: (c) 2021 by Ahmed TAHRI
+:license: MIT, see LICENSE for more details.
+"""
+import logging
+
+from .api import from_bytes, from_fp, from_path
+from .legacy import detect
+from .models import CharsetMatch, CharsetMatches
+from .utils import set_logging_handler
+from .version import VERSION, __version__
+
+__all__ = (
+    "from_fp",
+    "from_path",
+    "from_bytes",
+    "detect",
+    "CharsetMatch",
+    "CharsetMatches",
+    "__version__",
+    "VERSION",
+    "set_logging_handler",
+)
+
+# Attach a NullHandler to the top level logger by default
+# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
+
+logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())