From 07d44306bec77f6abf9a3b1eff967d4889852b77 Mon Sep 17 00:00:00 2001 From: Salas Date: Fri, 29 Jul 2016 01:05:07 +0800 Subject: [PATCH] [Decision] remove reducer-way in get_metas() - maybe no need for it, memory doesn't take big amount? --- dcard/forums.py | 22 ++++++++++++++-------- tests/test_forums.py | 7 +++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/dcard/forums.py b/dcard/forums.py index 677fbab..49757aa 100644 --- a/dcard/forums.py +++ b/dcard/forums.py @@ -32,24 +32,30 @@ def get_metas(self, num=30, sort='new', callback=None): pages = -(-num // self.metas_per_page) paged_metas = self._get_paged_metas(pages, sort) - results = [] - for page, metas in enumerate(paged_metas, start=1): - if page == pages: - metas = metas[:num - (pages - 1) * self.metas_per_page] - results.append(callback(metas) if callback else metas) # buffer? + # results = [] + # for page, metas in enumerate(paged_metas, start=1): + # if page == pages: + # metas = metas[:num - (pages - 1) * self.metas_per_page] + # results.append(callback(metas) if callback else metas) # buffer? + # + # if len(results) and isinstance(results[0], list): + # results = flatten_lists(results) - if len(results) and isinstance(results[0], list): - results = flatten_lists(results) + buff = flatten_lists((metas for metas in paged_metas))[:num] + results = callback(buff) if callback else buff - logger.info('[%s] 資訊蒐集完成,共%d筆' % (self.forum, len(results))) + logger.info('[%s] 資訊蒐集完成,共%d筆' % (self.forum, len(buff))) return results def _get_paged_metas(self, pages, sort): params = {'popular': False} if sort == 'new' else {} + for page in range(pages): data = self.client.get(self.posts_meta_url, params=params) + if len(data) == 0: logger.warning('[%s] 已到最末頁,第%d頁!' % (self.forum, page)) + params['before'] = data[-1]['id'] yield data diff --git a/tests/test_forums.py b/tests/test_forums.py index 4084916..4f8327b 100644 --- a/tests/test_forums.py +++ b/tests/test_forums.py @@ -35,8 +35,8 @@ def with_no_return(metas): return None def simulate_store_into_db(metas): - some_id = 987654 - return some_id + some_ids = [987654, 5156612] + return some_ids forum = forums.get('test')['alias'] @@ -46,5 +46,4 @@ def simulate_store_into_db(metas): assert len(ids) != 0 assert len(rids) != 0 - assert len(none) != 0 - assert none == [None] * len(none) + assert none == None