From 3502c64272a19e37451e99539657d9e5ff4a8eb5 Mon Sep 17 00:00:00 2001 From: Goldy <66202304+THEGOLDENPRO@users.noreply.github.com> Date: Thu, 25 Apr 2024 02:16:37 +0100 Subject: [PATCH 1/3] feat: add scraper overrides #291 --- mov_cli/cli/__main__.py | 2 +- mov_cli/cli/scraper.py | 21 ++++++++++++++------- mov_cli/config.py | 6 +++++- mov_cli/config.template.toml | 3 ++- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/mov_cli/cli/__main__.py b/mov_cli/cli/__main__.py index ce403126..3a19aa7b 100644 --- a/mov_cli/cli/__main__.py +++ b/mov_cli/cli/__main__.py @@ -84,7 +84,7 @@ def mov_cli( http_client = HTTPClient(config) - selected_scraper = select_scraper(plugins, config.fzf_enabled, config.default_scraper) + selected_scraper = select_scraper(plugins, config.scrapers, config.fzf_enabled, config.default_scraper) if selected_scraper is None: mov_cli_logger.error( diff --git a/mov_cli/cli/scraper.py b/mov_cli/cli/scraper.py index 3150b3ab..128d8d5f 100644 --- a/mov_cli/cli/scraper.py +++ b/mov_cli/cli/scraper.py @@ -46,11 +46,11 @@ def use_scraper( return chosen_scraper -def select_scraper(plugins: Dict[str, str], fzf_enabled: bool, default_scraper: Optional[str] = None) -> Optional[Tuple[str, Type[Scraper]]]: +def select_scraper(plugins: Dict[str, str], scrapers: Dict[str, str], fzf_enabled: bool, default_scraper: Optional[str] = None) -> Optional[Tuple[str, Type[Scraper]]]: plugins_data = get_plugins_data(plugins) if default_scraper is not None: - scraper_name, scraper_or_available_scrapers = get_scraper(default_scraper, plugins_data) + scraper_name, scraper_or_available_scrapers = get_scraper(default_scraper, plugins_data, scrapers) if scraper_name is None: mov_cli_logger.error( @@ -101,16 +101,23 @@ def steal_scraper_args(query: List[str]) -> ScraperOptionsT: [(x.replace("--", "").replace("-", "_"), True) for x in scrape_arguments] ) -def get_scraper(scraper_id: str, plugins_data: List[Tuple[str, str, PluginHookData]]) -> Tuple[str, Type[Scraper] | Tuple[None, List[str]]]: +def get_scraper(scraper_id: str, plugins_data: List[Tuple[str, str, PluginHookData]], user_defined_scrapers: Dict[str, str]) -> Tuple[str, Type[Scraper] | Tuple[None, List[str]]]: available_scrapers = [] + # scraper namespace override. + for scraper_namespace in user_defined_scrapers: + + if scraper_id.lower() == scraper_namespace.lower(): + mov_cli_logger.debug(f"Using the scraper overridden namespace '{scraper_namespace}'...") + scraper_id = user_defined_scrapers[scraper_namespace] + for plugin_namespace, _, plugin_hook_data in plugins_data: - scrapers = plugin_hook_data["scrapers"] + plugin_scrapers = plugin_hook_data["scrapers"] - if scraper_id.lower() == plugin_namespace.lower() and "DEFAULT" in scrapers: - return f"{plugin_namespace}.DEFAULT", scrapers["DEFAULT"] + if scraper_id.lower() == plugin_namespace.lower() and "DEFAULT" in plugin_scrapers: + return f"{plugin_namespace}.DEFAULT", plugin_scrapers["DEFAULT"] - for scraper_name, scraper in scrapers.items(): + for scraper_name, scraper in plugin_scrapers.items(): id = f"{plugin_namespace}.{scraper_name}".lower() available_scrapers.append(id) diff --git a/mov_cli/config.py b/mov_cli/config.py index a889ea31..78929a76 100644 --- a/mov_cli/config.py +++ b/mov_cli/config.py @@ -47,7 +47,7 @@ class ConfigData(TypedDict): ui: ConfigUIData http: ConfigHTTPData downloads: ConfigDownloadsData - scrapers: ScrapersData + scrapers: ScrapersData | Dict[str, str] plugins: Dict[str, str] resolution: int @@ -105,6 +105,10 @@ def player(self) -> Player: def plugins(self) -> Dict[str, str]: return self.data.get("plugins", {"test": "mov-cli-test"}) + @property + def scrapers(self) -> ScrapersData | Dict[str, str]: + return self.data.get("scrapers", {}) + @property def editor(self) -> Optional[str]: """Returns the editor that should be opened while editing.""" diff --git a/mov_cli/config.template.toml b/mov_cli/config.template.toml index fc1e2f14..4c4efa7d 100644 --- a/mov_cli/config.template.toml +++ b/mov_cli/config.template.toml @@ -12,8 +12,9 @@ skip_update_checker = false [mov-cli.plugins] # E.g: namespace = "package-name" test = "mov-cli-test" -# [mov-cli.scrapers] +[mov-cli.scrapers] # # default = "films" +test = "test.DEFAULT" # [mov-cli.http] # Don't mess with it if you don't know what you are doing! # headers = { User-Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0" } From e7656ac9c55eb874ec747d1115786e9b102c3e1c Mon Sep 17 00:00:00 2001 From: Goldy <66202304+THEGOLDENPRO@users.noreply.github.com> Date: Thu, 25 Apr 2024 02:31:43 +0100 Subject: [PATCH 2/3] style: remove whitespace --- mov_cli/config.template.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mov_cli/config.template.toml b/mov_cli/config.template.toml index 4c4efa7d..0eb91350 100644 --- a/mov_cli/config.template.toml +++ b/mov_cli/config.template.toml @@ -12,7 +12,7 @@ skip_update_checker = false [mov-cli.plugins] # E.g: namespace = "package-name" test = "mov-cli-test" -[mov-cli.scrapers] # +[mov-cli.scrapers] # default = "films" test = "test.DEFAULT" From acd81d97df25a0d3e3695105df5dc4f84c6e3ffe Mon Sep 17 00:00:00 2001 From: Goldy <66202304+THEGOLDENPRO@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:28:04 +0100 Subject: [PATCH 3/3] feat: scraper overrides with pre defined scraper options --- mov_cli/cli/__main__.py | 4 +++- mov_cli/cli/scraper.py | 31 ++++++++++++------------- mov_cli/config.py | 50 +++++++++++++++++++++++++++++------------ 3 files changed, 55 insertions(+), 30 deletions(-) diff --git a/mov_cli/cli/__main__.py b/mov_cli/cli/__main__.py index 3a19aa7b..7b49d662 100644 --- a/mov_cli/cli/__main__.py +++ b/mov_cli/cli/__main__.py @@ -93,7 +93,9 @@ def mov_cli( ) return False - chosen_scraper = use_scraper(selected_scraper, config, http_client, scrape_options) + selected_scraper[2].update(scrape_options) + + chosen_scraper = use_scraper(selected_scraper, config, http_client) choice = search(query, auto_select, chosen_scraper, config.fzf_enabled) diff --git a/mov_cli/cli/scraper.py b/mov_cli/cli/scraper.py index 128d8d5f..713472ad 100644 --- a/mov_cli/cli/scraper.py +++ b/mov_cli/cli/scraper.py @@ -4,9 +4,9 @@ if TYPE_CHECKING: from typing import Type, Optional, Tuple, List, Dict - from ..config import Config from ..media import Metadata, Media from ..http_client import HTTPClient + from ..config import Config, ScrapersConfigT from ..utils.episode_selector import EpisodeSelector from ..plugins import PluginHookData @@ -30,12 +30,11 @@ def scrape(choice: Metadata, episode: EpisodeSelector, scraper: Scraper) -> Medi return media def use_scraper( - selected_scraper: Tuple[str, Type[Scraper]], + selected_scraper: Tuple[str, Type[Scraper], ScraperOptionsT], config: Config, - http_client: HTTPClient, - scraper_options: ScraperOptionsT + http_client: HTTPClient ) -> Scraper: - scraper_name, scraper_class = selected_scraper + scraper_name, scraper_class, scraper_options = selected_scraper mov_cli_logger.info(f"Using '{Colours.BLUE.apply(scraper_name)}' scraper...") @@ -46,11 +45,11 @@ def use_scraper( return chosen_scraper -def select_scraper(plugins: Dict[str, str], scrapers: Dict[str, str], fzf_enabled: bool, default_scraper: Optional[str] = None) -> Optional[Tuple[str, Type[Scraper]]]: +def select_scraper(plugins: Dict[str, str], scrapers: ScrapersConfigT, fzf_enabled: bool, default_scraper: Optional[str] = None) -> Optional[Tuple[str, Type[Scraper], ScraperOptionsT]]: plugins_data = get_plugins_data(plugins) if default_scraper is not None: - scraper_name, scraper_or_available_scrapers = get_scraper(default_scraper, plugins_data, scrapers) + scraper_name, scraper_or_available_scrapers, scraper_options = get_scraper(default_scraper, plugins_data, scrapers) if scraper_name is None: mov_cli_logger.error( @@ -61,7 +60,7 @@ def select_scraper(plugins: Dict[str, str], scrapers: Dict[str, str], fzf_enable return None - return scraper_name, scraper_or_available_scrapers + return scraper_name, scraper_or_available_scrapers, scraper_options chosen_plugin = prompt( "Select a plugin", @@ -85,7 +84,7 @@ def select_scraper(plugins: Dict[str, str], scrapers: Dict[str, str], fzf_enable scraper_name, scraper = chosen_scraper - return f"{plugin_namespace}.{scraper_name}".lower(), scraper + return f"{plugin_namespace}.{scraper_name}".lower(), scraper, {} return None @@ -101,21 +100,23 @@ def steal_scraper_args(query: List[str]) -> ScraperOptionsT: [(x.replace("--", "").replace("-", "_"), True) for x in scrape_arguments] ) -def get_scraper(scraper_id: str, plugins_data: List[Tuple[str, str, PluginHookData]], user_defined_scrapers: Dict[str, str]) -> Tuple[str, Type[Scraper] | Tuple[None, List[str]]]: +def get_scraper(scraper_id: str, plugins_data: List[Tuple[str, str, PluginHookData]], user_defined_scrapers: ScrapersConfigT) -> Tuple[str, Type[Scraper] | Tuple[None, List[str]], ScraperOptionsT]: + scraper_options = {} available_scrapers = [] # scraper namespace override. - for scraper_namespace in user_defined_scrapers: + for scraper_namespace, scraper_data in user_defined_scrapers.items(): if scraper_id.lower() == scraper_namespace.lower(): mov_cli_logger.debug(f"Using the scraper overridden namespace '{scraper_namespace}'...") - scraper_id = user_defined_scrapers[scraper_namespace] + scraper_id = scraper_data["namespace"] + scraper_options = scraper_data["options"] for plugin_namespace, _, plugin_hook_data in plugins_data: plugin_scrapers = plugin_hook_data["scrapers"] if scraper_id.lower() == plugin_namespace.lower() and "DEFAULT" in plugin_scrapers: - return f"{plugin_namespace}.DEFAULT", plugin_scrapers["DEFAULT"] + return f"{plugin_namespace}.DEFAULT", plugin_scrapers["DEFAULT"], scraper_options for scraper_name, scraper in plugin_scrapers.items(): id = f"{plugin_namespace}.{scraper_name}".lower() @@ -123,6 +124,6 @@ def get_scraper(scraper_id: str, plugins_data: List[Tuple[str, str, PluginHookDa available_scrapers.append(id) if scraper_id.lower() == id: - return id, scraper + return id, scraper, scraper_options - return None, available_scrapers \ No newline at end of file + return None, available_scrapers, scraper_options \ No newline at end of file diff --git a/mov_cli/config.py b/mov_cli/config.py index 78929a76..cde24be0 100644 --- a/mov_cli/config.py +++ b/mov_cli/config.py @@ -4,10 +4,16 @@ if TYPE_CHECKING: from .players import Player - from typing import Dict, Union, Literal, Any, Optional + from typing import Dict, Literal, Any, Optional - JSON_VALUES = Union[str, bool, int, dict] - SUPPORTED_PARSERS = Literal["lxml", "html.parser"] + SupportedParsersT = Literal["lxml", "html.parser"] + + @final + class ScraperData(TypedDict): + namespace: str + options: Dict[str, str | bool] + + ScrapersConfigT = Dict[Literal["default"], str] | Dict[str, ScraperData] import os import toml @@ -20,7 +26,7 @@ from .logger import mov_cli_logger from .utils import get_appdata_directory -__all__ = ("Config", ) +__all__ = ("Config",) @final class ConfigUIData(TypedDict): @@ -34,20 +40,17 @@ class ConfigHTTPData(TypedDict): class ConfigDownloadsData(TypedDict): save_path: str -@final -class ScrapersData(TypedDict): - default: str - @final class ConfigData(TypedDict): version: int debug: bool player: str - parser: SUPPORTED_PARSERS + editor: str + parser: SupportedParsersT ui: ConfigUIData http: ConfigHTTPData downloads: ConfigDownloadsData - scrapers: ScrapersData | Dict[str, str] + scrapers: ScrapersConfigT | Dict[str, str] plugins: Dict[str, str] resolution: int @@ -106,13 +109,32 @@ def plugins(self) -> Dict[str, str]: return self.data.get("plugins", {"test": "mov-cli-test"}) @property - def scrapers(self) -> ScrapersData | Dict[str, str]: - return self.data.get("scrapers", {}) + def scrapers(self) -> ScrapersConfigT: + scrapers = self.data.get("scrapers", {}) + + consistent_scrapers: Dict[str, ScraperData] = {} + + for scraper, plugin_namespace_or_dict in scrapers.items(): + + if scraper == "default": + consistent_scrapers["default"] = plugin_namespace_or_dict + + elif isinstance(plugin_namespace_or_dict, str): + consistent_scrapers[scraper] = {"namespace": plugin_namespace_or_dict, "options": {}} + + else: + dict = plugin_namespace_or_dict + consistent_scrapers[scraper] = { + "namespace": dict["namespace"], + "options": dict["options"] + } + + return consistent_scrapers @property def editor(self) -> Optional[str]: """Returns the editor that should be opened while editing.""" - return self.data.get("editor") + return self.data.get("editor", None) @property def skip_update_checker(self) -> bool: @@ -129,7 +151,7 @@ def fzf_enabled(self) -> bool: return self.data.get("ui", {}).get("fzf", True if shutil.which("fzf") is not None else False) @property - def parser(self) -> SUPPORTED_PARSERS | Any: + def parser(self) -> SupportedParsersT | Any: """Returns the parser type configured by the user else it just returns the default.""" default_parser = "lxml" if find_spec("lxml") else "html.parser" return self.data.get("parser", default_parser)