Skip to content

Commit

Permalink
Feature: 为spider_post任务增加调用爬虫的定制传参支持,便于后台管理时爬取指定日期的文章
Browse files Browse the repository at this point in the history
  • Loading branch information
littlemo committed Jun 17, 2018
1 parent f7a340b commit f3f7696
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
2 changes: 1 addition & 1 deletion server/spiders/admin.py
Expand Up @@ -25,7 +25,7 @@ def action_spider_post(self, request, queryset):
c = (group(
tasks.spider_post.s(spider.name) for spider in queryset
))
c.delay()
c.delay() # 测试时指定爬取日期: date='20180530'
self.message_user(
request,
_('共触发 {num} 个爬虫源').format(
Expand Down
10 changes: 6 additions & 4 deletions server/spiders/tasks.py
Expand Up @@ -13,7 +13,7 @@


@shared_task
def spider_post(spider_name):
def spider_post(spider_name, **kwargs):
"""
爬取文章数据
"""
Expand All @@ -24,14 +24,16 @@ def spider_post(spider_name):
invoke_on_load=True,
)

def crawl(ext, *args):
rc = ext.obj.crawl() # 测试时指定爬取日期: date='20180507'
def crawl(ext, **kwargs):
log.info('[{name}]调用爬虫传参: {kwargs}'.format(
name=ext.name, kwargs=kwargs))
rc = ext.obj.crawl(**kwargs)
data = json.dumps(rc, ensure_ascii=False)
log.debug('[{name}]爬取返回包:{pack}'.format(
name=ext.name, pack=data))
return (ext.name, rc)

results = mgr.map(crawl)
results = mgr.map(crawl, **kwargs)
log.debug('结果对象:{results}'.format(
results=results))

Expand Down

0 comments on commit f3f7696

Please sign in to comment.