Skip to content

Commit

Permalink
Moved file encoding to charset-normalizer instead of chardet that is …
Browse files Browse the repository at this point in the history
…causing too much issues. #2196
  • Loading branch information
morpheus65535 committed Jul 7, 2023
1 parent 90ac551 commit dd9ce4d
Show file tree
Hide file tree
Showing 16 changed files with 4,606 additions and 3 deletions.
2 changes: 1 addition & 1 deletion bazarr/subtitles/indexer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from guess_language import guess_language
from subliminal_patch import core
from subzero.language import Language
from chardet import detect
from charset_normalizer import detect

from app.config import settings
from constants import hi_regex
Expand Down
2 changes: 1 addition & 1 deletion bazarr/utilities/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import hashlib

from chardet import detect
from charset_normalizer import detect
from bs4 import UnicodeDammit

from app.config import settings
Expand Down
45 changes: 45 additions & 0 deletions libs/charset_normalizer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
"""
Charset-Normalizer
~~~~~~~~~~~~~~
The Real First Universal Charset Detector.
A library that helps you read text from an unknown charset encoding.
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
All IANA character set names for which the Python core library provides codecs are supported.
Basic usage:
>>> from charset_normalizer import from_bytes
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
>>> best_guess = results.best()
>>> str(best_guess)
'Bсеки човек има право на образование. Oбразованието!'
Others methods and usages are available - see the full documentation
at <https://github.com/Ousret/charset_normalizer>.
:copyright: (c) 2021 by Ahmed TAHRI
:license: MIT, see LICENSE for more details.
"""
import logging

from .api import from_bytes, from_fp, from_path
from .legacy import detect
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
from .version import VERSION, __version__

__all__ = (
"from_fp",
"from_path",
"from_bytes",
"detect",
"CharsetMatch",
"CharsetMatches",
"__version__",
"VERSION",
"set_logging_handler",
)

# Attach a NullHandler to the top level logger by default
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library

logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
Loading

0 comments on commit dd9ce4d

Please sign in to comment.