From 6ceaa8d3eaad3311a1a3355ef4f936f40a2dd112 Mon Sep 17 00:00:00 2001 From: trickerer01 Date: Sun, 10 Dec 2023 21:21:00 +0700 Subject: [PATCH] Rev 485: Allow download order and sorting overrides. Support detailed meta tags: `sort:score:desc`. Update tests. --- src/app_download.py | 29 +++++++++++++++++------------ src/app_revision.py | 4 ++-- src/app_tags_parser.py | 4 ++-- src/app_unittests.py | 42 +++++++++++++++++++++++++++--------------- 4 files changed, 48 insertions(+), 31 deletions(-) diff --git a/src/app_download.py b/src/app_download.py index 5672e2f..beda85d 100644 --- a/src/app_download.py +++ b/src/app_download.py @@ -101,6 +101,7 @@ def __init__(self) -> None: self.item_info_dict_all = dict() # type: Dict[str, ItemInfo] self.neg_and_groups = list() # type: List[List[Pattern[str]]] self.known_parents = set() # type: Set[str] + self.default_sort = True def __del__(self) -> None: self.__cleanup() @@ -855,7 +856,8 @@ def _download_all(self) -> None: trace('\nNothing to download: queue is empty') return - self.items_raw_all = sorted(self.items_raw_all, key=lambda x: int(self._extract_id(x)), reverse=True) # type: List[str] + if self.default_sort: + self.items_raw_all = sorted(self.items_raw_all, key=lambda x: int(self._extract_id(x)), reverse=True) # type: List[str] if self.download_limit > 0: if len(self.items_raw_all) > self.download_limit: @@ -865,12 +867,12 @@ def _download_all(self) -> None: else: trace('\nShrinking queue down is not required!') - item_front = self._extract_item_info(self.items_raw_all[0]).id - item_end = self._extract_item_info(self.items_raw_all[-1]).id - # front item is always >= end item - trace(f'\nProcessing {self.total_count_all:d} items, bound {item_end} to {item_front}') + min_id = self._extract_id(min(self.items_raw_all, key=lambda x: int(self._extract_id(x)))) + max_id = self._extract_id(max(self.items_raw_all, key=lambda x: int(self._extract_id(x)))) + trace(f'\nProcessing {self.total_count_all:d} items, bound {min_id} to {max_id}') - self.items_raw_all.reverse() + if self.default_sort: + self.items_raw_all.reverse() self.current_state = DownloaderStates.STATE_DOWNLOADING trace(f'{self.total_count_all:d} item(s) scheduled, {self.maxthreads_items:d} thread(s) max\nWorking...\n') @@ -900,15 +902,18 @@ def _parse_tags(self, tags_base_arr: Iterable[str]) -> None: tags_list, self.neg_and_groups = extract_neg_and_groups(' '.join(tags_base_arr)) for t in tags_list: if len(t) > 2 and f'{t[0]}{t[-1]}' == '()' and f'{t[:2]}{t[-2:]}' != f'({cc}{cc})': - thread_exit(f'Error: invalid tag \'{t}\'! Looks like \'or\' group but not fully contatenated with \'{cc}\'') + thread_exit(f'Error: invalid tag \'{t}\'! Looks like \'or\' group but not fully contatenated by \'{cc}\'') self.tags_str_arr = split_tags_into_tasks(tags_list, cc, sc, split_always) self.orig_tasks_count = self._tasks_count() - # conflict: sort tag + date filter + # conflict: non-default sorting sort_checker = (lambda s: (s.startswith('order=') and s != 'order=id_desc') if ProcModule.is_rn() else - (s.startswith('sort:') and s != 'sort:id')) - sort_tags = list(filter(sort_checker, tags_list)) - if sort_tags and (self.date_min != DATE_MIN_DEFAULT or self.date_max != datetime.today().strftime(FMT_DATE)): - thread_exit('Error: cannot use both sort tag and date filter at the same time!') + (s.startswith('sort:') and s != 'sort:id' and s != 'sort:id:desc')) + self.default_sort = len(list(filter(sort_checker, tags_list))) == 0 + if not self.default_sort: + if self._tasks_count() > 1: + thread_exit('Error: cannot use non-default sorting with multi-task query!') + if self.date_min != DATE_MIN_DEFAULT or self.date_max != datetime.today().strftime(FMT_DATE): + thread_exit('Error: cannot use both sort tag and date filter at the same time!') def _process_all_tags(self) -> None: if self.warn_nonempty: diff --git a/src/app_revision.py b/src/app_revision.py index e786e72..a4c5485 100644 --- a/src/app_revision.py +++ b/src/app_revision.py @@ -11,10 +11,10 @@ APP_NAME = 'Ruxx' APP_VER_MAJOR = '1' APP_VER_SUB = '3' -APP_REVISION = '483' +APP_REVISION = '485' APP_IS_BETA = False APP_IS_BETA_TEXT = 'b' * APP_IS_BETA -APP_REV_DATE = '08 Dec 2023' +APP_REV_DATE = '10 Dec 2023' APP_VERSION = f'{APP_VER_MAJOR}.{APP_VER_SUB}.{APP_REVISION}{APP_IS_BETA_TEXT}' # diff --git a/src/app_tags_parser.py b/src/app_tags_parser.py index ad60a1e..213f7dd 100644 --- a/src/app_tags_parser.py +++ b/src/app_tags_parser.py @@ -26,11 +26,11 @@ # language=PythonRegExp META_CHAR = r'[a-z\d_]' # language=PythonRegExp -META_COUNT_RX = r':(?:[<>]=?|=)?[a-z\d_]+?' +META_COUNT_RX = r':(?:(?:[<>]=?|=)?[a-z\d_]+?|[a-z\d_]+:[a-z\d_]+)' # language=PythonRegExp META_COUNT_RN = r'(?:[<>]=?|=)[a-z\d_]+?' # language=PythonRegExp -META_COUNT_RS = r':(?:[<>]=?|=)?[a-z\d_]+?' +META_COUNT_RS = r':(?:(?:[<>]=?|=)?[a-z\d_]+?|[a-z\d_]+:[a-z\d_]+)' # language=PythonRegExp RE_ORGR_PART_RX = fr'{TAG_CHAR}+?(?:{META_COUNT_RX})?' # language=PythonRegExp diff --git a/src/app_unittests.py b/src/app_unittests.py index 1ce1880..5099f47 100644 --- a/src/app_unittests.py +++ b/src/app_unittests.py @@ -30,13 +30,15 @@ 'sfw asd ned -nds -proxr ' '-timeout 13 -retries 56 -dmode 0 -skip_img -skip_vid -lowres -noproxy -proxynodown -prefix -dump_tags -dump_sources -append_info' ) -args_argparse_str2 = ( +args_argparse_str2_base = ( 'sfw asd ned -nds -proxt ' - '-mindate 31-12-1950 -maxdate 01-01-2038 -threads 8 -proxy http://8.8.8.8:65333 ' + '-threads 8 -proxy http://8.8.8.8:65333 ' '-headers {"name1":"value1"} -cookies {"name2":"value2"} ' '-path ' + CUR_PATH ) +args_argparse_str2 = args_argparse_str2_base + ' -mindate 31-12-1950 -maxdate 01-01-2038' args_argparse_str3 = args_argparse_str2 + ' sort:score' +args_argparse_str4 = args_argparse_str2_base + ' sort:score:desc score:40' item_str1_rx = ( ' None: self.assertIsNotNone(arglist.tags) # print(str(arglist.tags)) self.assertEqual(5, len(arglist.tags)) - print('test_argparse1 passed') + print(f'{self._testMethodName} passed') def test_argparse2(self) -> None: # 5 tags, value types check @@ -83,7 +85,7 @@ def test_argparse2(self) -> None: self.assertIsNotNone(arglist.proxy) self.assertIsNotNone(arglist.headers) self.assertIsNotNone(arglist.cookies) - print('test_argparse2 passed') + print(f'{self._testMethodName} passed') class DownloaderBaseTests(TestCase): @@ -95,7 +97,7 @@ def test_item1_rx(self) -> None: dwn.parse_args(arglist) self.assertEqual('7869261', dwn._extract_id(dwn._local_addr_from_string(item_str1_rx))) self.assertEqual('06-05-2023', dwn._extract_post_date(item_str1_rx)) - print('test_item1_rx passe') + print(f'{self._testMethodName} passed') def test_item1_rn(self) -> None: Logger.init(True, True) @@ -104,7 +106,7 @@ def test_item1_rn(self) -> None: with DownloaderRn() as dwn: dwn.parse_args(arglist) self.assertEqual('427251', dwn._extract_id(item_str1_rn)) - print('test_item1_rn passe') + print(f'{self._testMethodName} passed') def test_item1_rs(self) -> None: Logger.init(True, True) @@ -114,7 +116,7 @@ def test_item1_rs(self) -> None: dwn.parse_args(arglist) self.assertEqual('7939303', dwn._extract_id(dwn._local_addr_from_string(item_str1_rs))) self.assertEqual(DATE_MIN_DEFAULT, dwn._extract_post_date(item_str1_rs)) - print('test_item1_rs passe') + print(f'{self._testMethodName} passed') def test_cmdline1(self) -> None: Logger.init(True, True) @@ -136,7 +138,7 @@ def test_cmdline1(self) -> None: self.assertTrue(dwn.dump_tags) self.assertTrue(dwn.dump_sources) self.assertTrue(dwn.append_info) - print('test_cmdline1 passed') + print(f'{self._testMethodName} passed') def test_cmdline2(self) -> None: Logger.init(True, True) @@ -153,7 +155,7 @@ def test_cmdline2(self) -> None: self.assertEqual('http://8.8.8.8:65333', dwn.proxies.get('https')) self.assertEqual('value1', dwn.add_headers.get('name1')) self.assertEqual('value2', dwn.add_cookies.get('name2')) - print('test_cmdline2 passed') + print(f'{self._testMethodName} passed') def test_cmdline3(self) -> None: Logger.init(True, True) @@ -161,7 +163,17 @@ def test_cmdline3(self) -> None: arglist = prepare_arglist(args.split()) with DownloaderRx() as dwn: self.assertRaises(ThreadInterruptException, dwn.parse_args, arglist) - print('test_cmdline3 passed') + print(f'{self._testMethodName} passed') + + def test_cmdline4(self) -> None: + Logger.init(True, True) + args = args_argparse_str4 + arglist = prepare_arglist(args.split()) + with DownloaderRx() as dwn: + dwn.parse_args(arglist) + self.assertFalse(dwn.default_sort) + self.assertEqual(7, dwn.get_tags_count()) + print(f'{self._testMethodName} passed') # Tests below require actual connection @@ -178,7 +190,7 @@ def test_connect_rx1(self) -> None: dwn.url = dwn.form_tags_search_address(dwn.tags_str_arr[0]) dwn.total_count = dwn.get_items_query_size_or_html(dwn.url) self.assertEqual(1, dwn.total_count) - print('test_connect_rx1 passed') + print(f'{self._testMethodName} passed') def test_connect_rs1(self) -> None: # connection and downloading for rx is performed using same web address, we are free to use dry run here (-dmode 1) @@ -191,7 +203,7 @@ def test_connect_rs1(self) -> None: dwn.url = dwn.form_tags_search_address(dwn.tags_str_arr[0]) dwn.total_count = dwn.get_items_query_size_or_html(dwn.url) self.assertEqual(1, dwn.total_count) - print('test_connect_rs1 passed') + print(f'{self._testMethodName} passed') class DownloadTests(TestCase): @@ -205,7 +217,7 @@ def test_down_rx1(self) -> None: dwn.launch_download(arglist) self.assertTrue(dwn.fail_count == 0, f'dwn.failCount {dwn.fail_count:d} == 0') self.assertTrue(dwn.processed_count == 1, f'dwn.processed_count {dwn.fail_count:d} == 1') - print('test_down_rx1 passed') + print(f'{self._testMethodName} passed') def test_down_rx2(self) -> None: # this test actually performs a download @@ -220,7 +232,7 @@ def test_down_rx2(self) -> None: dwn.launch_download(arglist) self.assertTrue(path.isfile(tempfile_path)) remove_file(tempfile_path) - print('test_down_rx2 passed') + print(f'{self._testMethodName} passed') def test_down_rs1(self) -> None: # this test actually performs a download @@ -235,7 +247,7 @@ def test_down_rs1(self) -> None: dwn.launch_download(arglist) self.assertTrue(path.isfile(tempfile_path)) remove_file(tempfile_path) - print('test_down_rs1 passed') + print(f'{self._testMethodName} passed') def run_all_tests() -> None: