Skip to content

Commit

Permalink
Rev 485: Allow download order and sorting overrides. Support detailed…
Browse files Browse the repository at this point in the history
… meta tags: `sort:score:desc`. Update tests.
  • Loading branch information
trickerer01 committed Dec 10, 2023
1 parent 48d7d4a commit 6ceaa8d
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 31 deletions.
29 changes: 17 additions & 12 deletions src/app_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def __init__(self) -> None:
self.item_info_dict_all = dict() # type: Dict[str, ItemInfo]
self.neg_and_groups = list() # type: List[List[Pattern[str]]]
self.known_parents = set() # type: Set[str]
self.default_sort = True

def __del__(self) -> None:
self.__cleanup()
Expand Down Expand Up @@ -855,7 +856,8 @@ def _download_all(self) -> None:
trace('\nNothing to download: queue is empty')
return

self.items_raw_all = sorted(self.items_raw_all, key=lambda x: int(self._extract_id(x)), reverse=True) # type: List[str]
if self.default_sort:
self.items_raw_all = sorted(self.items_raw_all, key=lambda x: int(self._extract_id(x)), reverse=True) # type: List[str]

if self.download_limit > 0:
if len(self.items_raw_all) > self.download_limit:
Expand All @@ -865,12 +867,12 @@ def _download_all(self) -> None:
else:
trace('\nShrinking queue down is not required!')

item_front = self._extract_item_info(self.items_raw_all[0]).id
item_end = self._extract_item_info(self.items_raw_all[-1]).id
# front item is always >= end item
trace(f'\nProcessing {self.total_count_all:d} items, bound {item_end} to {item_front}')
min_id = self._extract_id(min(self.items_raw_all, key=lambda x: int(self._extract_id(x))))
max_id = self._extract_id(max(self.items_raw_all, key=lambda x: int(self._extract_id(x))))
trace(f'\nProcessing {self.total_count_all:d} items, bound {min_id} to {max_id}')

self.items_raw_all.reverse()
if self.default_sort:
self.items_raw_all.reverse()
self.current_state = DownloaderStates.STATE_DOWNLOADING
trace(f'{self.total_count_all:d} item(s) scheduled, {self.maxthreads_items:d} thread(s) max\nWorking...\n')

Expand Down Expand Up @@ -900,15 +902,18 @@ def _parse_tags(self, tags_base_arr: Iterable[str]) -> None:
tags_list, self.neg_and_groups = extract_neg_and_groups(' '.join(tags_base_arr))
for t in tags_list:
if len(t) > 2 and f'{t[0]}{t[-1]}' == '()' and f'{t[:2]}{t[-2:]}' != f'({cc}{cc})':
thread_exit(f'Error: invalid tag \'{t}\'! Looks like \'or\' group but not fully contatenated with \'{cc}\'')
thread_exit(f'Error: invalid tag \'{t}\'! Looks like \'or\' group but not fully contatenated by \'{cc}\'')
self.tags_str_arr = split_tags_into_tasks(tags_list, cc, sc, split_always)
self.orig_tasks_count = self._tasks_count()
# conflict: sort tag + date filter
# conflict: non-default sorting
sort_checker = (lambda s: (s.startswith('order=') and s != 'order=id_desc') if ProcModule.is_rn() else
(s.startswith('sort:') and s != 'sort:id'))
sort_tags = list(filter(sort_checker, tags_list))
if sort_tags and (self.date_min != DATE_MIN_DEFAULT or self.date_max != datetime.today().strftime(FMT_DATE)):
thread_exit('Error: cannot use both sort tag and date filter at the same time!')
(s.startswith('sort:') and s != 'sort:id' and s != 'sort:id:desc'))
self.default_sort = len(list(filter(sort_checker, tags_list))) == 0
if not self.default_sort:
if self._tasks_count() > 1:
thread_exit('Error: cannot use non-default sorting with multi-task query!')
if self.date_min != DATE_MIN_DEFAULT or self.date_max != datetime.today().strftime(FMT_DATE):
thread_exit('Error: cannot use both sort tag and date filter at the same time!')

def _process_all_tags(self) -> None:
if self.warn_nonempty:
Expand Down
4 changes: 2 additions & 2 deletions src/app_revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
APP_NAME = 'Ruxx'
APP_VER_MAJOR = '1'
APP_VER_SUB = '3'
APP_REVISION = '483'
APP_REVISION = '485'
APP_IS_BETA = False
APP_IS_BETA_TEXT = 'b' * APP_IS_BETA
APP_REV_DATE = '08 Dec 2023'
APP_REV_DATE = '10 Dec 2023'
APP_VERSION = f'{APP_VER_MAJOR}.{APP_VER_SUB}.{APP_REVISION}{APP_IS_BETA_TEXT}'

#
Expand Down
4 changes: 2 additions & 2 deletions src/app_tags_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
# language=PythonRegExp
META_CHAR = r'[a-z\d_]'
# language=PythonRegExp
META_COUNT_RX = r':(?:[<>]=?|=)?[a-z\d_]+?'
META_COUNT_RX = r':(?:(?:[<>]=?|=)?[a-z\d_]+?|[a-z\d_]+:[a-z\d_]+)'
# language=PythonRegExp
META_COUNT_RN = r'(?:[<>]=?|=)[a-z\d_]+?'
# language=PythonRegExp
META_COUNT_RS = r':(?:[<>]=?|=)?[a-z\d_]+?'
META_COUNT_RS = r':(?:(?:[<>]=?|=)?[a-z\d_]+?|[a-z\d_]+:[a-z\d_]+)'
# language=PythonRegExp
RE_ORGR_PART_RX = fr'{TAG_CHAR}+?(?:{META_COUNT_RX})?'
# language=PythonRegExp
Expand Down
42 changes: 27 additions & 15 deletions src/app_unittests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
'sfw asd ned -nds -proxr '
'-timeout 13 -retries 56 -dmode 0 -skip_img -skip_vid -lowres -noproxy -proxynodown -prefix -dump_tags -dump_sources -append_info'
)
args_argparse_str2 = (
args_argparse_str2_base = (
'sfw asd ned -nds -proxt '
'-mindate 31-12-1950 -maxdate 01-01-2038 -threads 8 -proxy http://8.8.8.8:65333 '
'-threads 8 -proxy http://8.8.8.8:65333 '
'-headers {"name1":"value1"} -cookies {"name2":"value2"} '
'-path ' + CUR_PATH
)
args_argparse_str2 = args_argparse_str2_base + ' -mindate 31-12-1950 -maxdate 01-01-2038'
args_argparse_str3 = args_argparse_str2 + ' sort:score'
args_argparse_str4 = args_argparse_str2_base + ' sort:score:desc score:40'
item_str1_rx = (
'<post height="1291" score="27" file_url="/images/6898/76dfed93372eb7a373ffe2430379cfb1.jpeg" parent_id="90002"'
' sample_url="/preview/6898/76dfed93372eb7a373ffe2430379cfb1.jpeg" sample_width="961" sample_height="1291"'
Expand Down Expand Up @@ -68,7 +70,7 @@ def test_argparse1(self) -> None:
self.assertIsNotNone(arglist.tags)
# print(str(arglist.tags))
self.assertEqual(5, len(arglist.tags))
print('test_argparse1 passed')
print(f'{self._testMethodName} passed')

def test_argparse2(self) -> None:
# 5 tags, value types check
Expand All @@ -83,7 +85,7 @@ def test_argparse2(self) -> None:
self.assertIsNotNone(arglist.proxy)
self.assertIsNotNone(arglist.headers)
self.assertIsNotNone(arglist.cookies)
print('test_argparse2 passed')
print(f'{self._testMethodName} passed')


class DownloaderBaseTests(TestCase):
Expand All @@ -95,7 +97,7 @@ def test_item1_rx(self) -> None:
dwn.parse_args(arglist)
self.assertEqual('7869261', dwn._extract_id(dwn._local_addr_from_string(item_str1_rx)))
self.assertEqual('06-05-2023', dwn._extract_post_date(item_str1_rx))
print('test_item1_rx passe')
print(f'{self._testMethodName} passed')

def test_item1_rn(self) -> None:
Logger.init(True, True)
Expand All @@ -104,7 +106,7 @@ def test_item1_rn(self) -> None:
with DownloaderRn() as dwn:
dwn.parse_args(arglist)
self.assertEqual('427251', dwn._extract_id(item_str1_rn))
print('test_item1_rn passe')
print(f'{self._testMethodName} passed')

def test_item1_rs(self) -> None:
Logger.init(True, True)
Expand All @@ -114,7 +116,7 @@ def test_item1_rs(self) -> None:
dwn.parse_args(arglist)
self.assertEqual('7939303', dwn._extract_id(dwn._local_addr_from_string(item_str1_rs)))
self.assertEqual(DATE_MIN_DEFAULT, dwn._extract_post_date(item_str1_rs))
print('test_item1_rs passe')
print(f'{self._testMethodName} passed')

def test_cmdline1(self) -> None:
Logger.init(True, True)
Expand All @@ -136,7 +138,7 @@ def test_cmdline1(self) -> None:
self.assertTrue(dwn.dump_tags)
self.assertTrue(dwn.dump_sources)
self.assertTrue(dwn.append_info)
print('test_cmdline1 passed')
print(f'{self._testMethodName} passed')

def test_cmdline2(self) -> None:
Logger.init(True, True)
Expand All @@ -153,15 +155,25 @@ def test_cmdline2(self) -> None:
self.assertEqual('http://8.8.8.8:65333', dwn.proxies.get('https'))
self.assertEqual('value1', dwn.add_headers.get('name1'))
self.assertEqual('value2', dwn.add_cookies.get('name2'))
print('test_cmdline2 passed')
print(f'{self._testMethodName} passed')

def test_cmdline3(self) -> None:
Logger.init(True, True)
args = args_argparse_str3
arglist = prepare_arglist(args.split())
with DownloaderRx() as dwn:
self.assertRaises(ThreadInterruptException, dwn.parse_args, arglist)
print('test_cmdline3 passed')
print(f'{self._testMethodName} passed')

def test_cmdline4(self) -> None:
Logger.init(True, True)
args = args_argparse_str4
arglist = prepare_arglist(args.split())
with DownloaderRx() as dwn:
dwn.parse_args(arglist)
self.assertFalse(dwn.default_sort)
self.assertEqual(7, dwn.get_tags_count())
print(f'{self._testMethodName} passed')


# Tests below require actual connection
Expand All @@ -178,7 +190,7 @@ def test_connect_rx1(self) -> None:
dwn.url = dwn.form_tags_search_address(dwn.tags_str_arr[0])
dwn.total_count = dwn.get_items_query_size_or_html(dwn.url)
self.assertEqual(1, dwn.total_count)
print('test_connect_rx1 passed')
print(f'{self._testMethodName} passed')

def test_connect_rs1(self) -> None:
# connection and downloading for rx is performed using same web address, we are free to use dry run here (-dmode 1)
Expand All @@ -191,7 +203,7 @@ def test_connect_rs1(self) -> None:
dwn.url = dwn.form_tags_search_address(dwn.tags_str_arr[0])
dwn.total_count = dwn.get_items_query_size_or_html(dwn.url)
self.assertEqual(1, dwn.total_count)
print('test_connect_rs1 passed')
print(f'{self._testMethodName} passed')


class DownloadTests(TestCase):
Expand All @@ -205,7 +217,7 @@ def test_down_rx1(self) -> None:
dwn.launch_download(arglist)
self.assertTrue(dwn.fail_count == 0, f'dwn.failCount {dwn.fail_count:d} == 0')
self.assertTrue(dwn.processed_count == 1, f'dwn.processed_count {dwn.fail_count:d} == 1')
print('test_down_rx1 passed')
print(f'{self._testMethodName} passed')

def test_down_rx2(self) -> None:
# this test actually performs a download
Expand All @@ -220,7 +232,7 @@ def test_down_rx2(self) -> None:
dwn.launch_download(arglist)
self.assertTrue(path.isfile(tempfile_path))
remove_file(tempfile_path)
print('test_down_rx2 passed')
print(f'{self._testMethodName} passed')

def test_down_rs1(self) -> None:
# this test actually performs a download
Expand All @@ -235,7 +247,7 @@ def test_down_rs1(self) -> None:
dwn.launch_download(arglist)
self.assertTrue(path.isfile(tempfile_path))
remove_file(tempfile_path)
print('test_down_rs1 passed')
print(f'{self._testMethodName} passed')


def run_all_tests() -> None:
Expand Down

0 comments on commit 6ceaa8d

Please sign in to comment.