Skip to content

Commit

Permalink
Upgraded guessit to latest version and include new dependencies.
Browse files Browse the repository at this point in the history
  • Loading branch information
morpheus65535 committed Jan 6, 2022
1 parent 166c2a7 commit f55492a
Show file tree
Hide file tree
Showing 83 changed files with 4,389 additions and 907 deletions.
11 changes: 4 additions & 7 deletions libs/guessit/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def guess_filename(filename, options):
print(yline)
i += 1
else:
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))


def display_properties(options):
Expand Down Expand Up @@ -85,10 +85,10 @@ def display_properties(options):
properties_list = list(sorted(properties.keys()))
for property_name in properties_list:
property_values = properties.get(property_name)
print(2 * ' ' + '[+] %s' % (property_name,))
print(2 * ' ' + f'[+] {property_name}')
if property_values and options.get('values'):
for property_value in property_values:
print(4 * ' ' + '[!] %s' % (property_value,))
print(4 * ' ' + f'[!] {property_value}')


def main(args=None): # pylint:disable=too-many-branches
Expand Down Expand Up @@ -136,11 +136,8 @@ def main(args=None): # pylint:disable=too-many-branches
for filename in options.get('filename'):
filenames.append(filename)
if options.get('input_file'):
input_file = open(options.get('input_file'), 'r', encoding='utf-8')
try:
with open(options.get('input_file'), 'r', encoding='utf-8') as input_file:
filenames.extend([line.strip() for line in input_file.readlines()])
finally:
input_file.close()

filenames = list(filter(lambda f: f, filenames))

Expand Down
2 changes: 1 addition & 1 deletion libs/guessit/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '3.3.1'
__version__ = '3.4.3'
41 changes: 29 additions & 12 deletions libs/guessit/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
API functions that can be used by external software
"""

from collections import OrderedDict

from pathlib import Path
import os
import traceback
from collections import OrderedDict
from copy import deepcopy
from pathlib import Path

from rebulk.introspector import introspect

Expand All @@ -25,22 +25,21 @@ class GuessitException(Exception):
def __init__(self, string, options):
super().__init__("An internal error has occured in guessit.\n"
"===================== Guessit Exception Report =====================\n"
"version=%s\n"
"string=%s\n"
"options=%s\n"
f"version={__version__}\n"
f"string={str(string)}\n"
f"options={str(options)}\n"
"--------------------------------------------------------------------\n"
"%s"
f"{traceback.format_exc()}"
"--------------------------------------------------------------------\n"
"Please report at "
"https://github.com/guessit-io/guessit/issues.\n"
"====================================================================" %
(__version__, str(string), str(options), traceback.format_exc()))
"====================================================================")

self.string = string
self.options = options


def configure(options=None, rules_builder=rebulk_builder, force=False):
def configure(options=None, rules_builder=None, force=False):
"""
Load configuration files and initialize rebulk rules if required.
Expand All @@ -55,6 +54,13 @@ def configure(options=None, rules_builder=rebulk_builder, force=False):
default_api.configure(options, rules_builder=rules_builder, force=force)


def reset():
"""
Reset api internal state.
"""
default_api.reset()


def guessit(string, options=None):
"""
Retrieves all matches from string as a dict
Expand Down Expand Up @@ -104,6 +110,12 @@ def __init__(self):
self.load_config_options = None
self.advanced_config = None

def reset(self):
"""
Reset api internal state.
"""
self.__init__()

@classmethod
def _fix_encoding(cls, value):
if isinstance(value, list):
Expand All @@ -121,7 +133,7 @@ def _has_same_properties(cls, dic1, dic2, values):
return False
return True

def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
def configure(self, options=None, rules_builder=None, force=False, sanitize_options=True):
"""
Load configuration files and initialize rebulk rules if required.
Expand All @@ -131,9 +143,14 @@ def configure(self, options=None, rules_builder=rebulk_builder, force=False, san
:type rules_builder:
:param force:
:type force: bool
:param sanitize_options:
:type force: bool
:return:
:rtype: dict
"""
if not rules_builder:
rules_builder = rebulk_builder

if sanitize_options:
options = parse_options(options, True)
options = self._fix_encoding(options)
Expand All @@ -154,7 +171,7 @@ def configure(self, options=None, rules_builder=rebulk_builder, force=False, san
self.advanced_config != advanced_config

if should_build_rebulk:
self.advanced_config = advanced_config
self.advanced_config = deepcopy(advanced_config)
self.rebulk = rules_builder(advanced_config)

self.config = config
Expand Down
152 changes: 152 additions & 0 deletions libs/guessit/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""
Config module.
"""
from importlib import import_module
from typing import Any, List

from rebulk import Rebulk

_regex_prefix = 're:'
_import_prefix = 'import:'
_import_cache = {}
_eval_prefix = 'eval:'
_eval_cache = {}
_pattern_types = ('regex', 'string')
_default_module_names = {
'validator': 'guessit.rules.common.validators',
'formatter': 'guessit.rules.common.formatters'
}


def _process_option(name: str, value: Any):
if name in ('validator', 'conflict_solver', 'formatter'):
if isinstance(value, dict):
return {item_key: _process_option(name, item_value) for item_key, item_value in value.items()}
if value is not None:
return _process_option_executable(value, _default_module_names.get(name))
return value


def _import(value: str, default_module_name=None):
if '.' in value:
module_name, target = value.rsplit(':', 1)
else:
module_name = default_module_name
target = value
import_id = module_name + ":" + target
if import_id in _import_cache:
return _import_cache[import_id]

mod = import_module(module_name)

imported = mod
for item in target.split("."):
imported = getattr(imported, item)

_import_cache[import_id] = imported

return imported


def _eval(value: str):
compiled = _eval_cache.get(value)
if not compiled:
compiled = compile(value, '<string>', 'eval')
return eval(compiled) # pylint:disable=eval-used


def _process_option_executable(value: str, default_module_name=None):
if value.startswith(_import_prefix):
value = value[len(_import_prefix):]
return _import(value, default_module_name)
if value.startswith(_eval_prefix):
value = value[len(_eval_prefix):]
return _eval(value)
if value.startswith('lambda ') or value.startswith('lambda:'):
return _eval(value)
return value


def _process_callable_entry(callable_spec: str, rebulk: Rebulk, entry: dict):
_process_option_executable(callable_spec)(rebulk, **entry)


def _build_entry_decl(entry, options, value):
entry_decl = dict(options.get(None, {}))
if not value.startswith('_'):
entry_decl['value'] = value
if isinstance(entry, str):
if entry.startswith(_regex_prefix):
entry_decl["regex"] = [entry[len(_regex_prefix):]]
else:
entry_decl["string"] = [entry]
else:
entry_decl.update(entry)
if "pattern" in entry_decl:
legacy_pattern = entry.pop("pattern")
if legacy_pattern.startswith(_regex_prefix):
entry_decl["regex"] = [legacy_pattern[len(_regex_prefix):]]
else:
entry_decl["string"] = [legacy_pattern]
return entry_decl


def load_patterns(rebulk: Rebulk,
pattern_type: str,
patterns: List[str],
options: dict = None):
"""
Load patterns for a prepared config entry
:param rebulk: Rebulk builder to use.
:param pattern_type: Pattern type.
:param patterns: Patterns
:param options: kwargs options to pass to rebulk pattern function.
:return:
"""
default_options = options.get(None) if options else None
item_options = dict(default_options) if default_options else {}
pattern_type_option = options.get(pattern_type)
if pattern_type_option:
item_options.update(pattern_type_option)
item_options = {name: _process_option(name, value) for name, value in item_options.items()}
getattr(rebulk, pattern_type)(*patterns, **item_options)


def load_config_patterns(rebulk: Rebulk,
config: dict,
options: dict = None):
"""
Load patterns defined in given config.
:param rebulk: Rebulk builder to use.
:param config: dict containing pattern definition.
:param options: Additional pattern options to use.
:type options: Dict[Dict[str, str]] A dict where key is the pattern type (regex, string, functional) and value is
the default kwargs options to pass.
:return:
"""
if options is None:
options = {}

for value, raw_entries in config.items():
entries = raw_entries if isinstance(raw_entries, list) else [raw_entries]
for entry in entries:
if isinstance(entry, dict) and "callable" in entry.keys():
_process_callable_entry(entry.pop("callable"), rebulk, entry)
continue
entry_decl = _build_entry_decl(entry, options, value)

for pattern_type in _pattern_types:
patterns = entry_decl.get(pattern_type)
if not patterns:
continue
if not isinstance(patterns, list):
patterns = [patterns]
patterns_entry_decl = dict(entry_decl)

for pattern_type_to_remove in _pattern_types:
patterns_entry_decl.pop(pattern_type_to_remove, None)

current_pattern_options = dict(options)
current_pattern_options[None] = patterns_entry_decl

load_patterns(rebulk, pattern_type, patterns, current_pattern_options)

0 comments on commit f55492a

Please sign in to comment.