Skip to content

Commit

Permalink
make dupefilter support create from spider
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouyanhui@kanzhun.com committed Jan 5, 2017
1 parent 500039b commit 384996d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
9 changes: 9 additions & 0 deletions src/scrapy_redis/dupefilter.py
Expand Up @@ -114,6 +114,15 @@ def request_fingerprint(self, request):
"""
return request_fingerprint(request)

@classmethod
def from_spider(cls, spider):
settings = spider.settings
server = get_redis_from_settings(settings)
dupefilter_key = settings.get("SCHEDULER_DUPEFILTER_KEY", defaults.SCHEDULER_DUPEFILTER_KEY)
key = dupefilter_key % {'spider': spider.name}
debug = settings.getbool('DUPEFILTER_DEBUG')
return cls(server, key=key, debug=debug)

def close(self, reason=''):
"""Delete data on close. Called by Scrapy's scheduler.
Expand Down
10 changes: 1 addition & 9 deletions src/scrapy_redis/scheduler.py
Expand Up @@ -134,15 +134,7 @@ def open(self, spider):
raise ValueError("Failed to instantiate queue class '%s': %s",
self.queue_cls, e)

try:
self.df = load_object(self.dupefilter_cls)(
server=self.server,
key=self.dupefilter_key % {'spider': spider.name},
debug=spider.settings.getbool('DUPEFILTER_DEBUG'),
)
except TypeError as e:
raise ValueError("Failed to instantiate dupefilter class '%s': %s",
self.dupefilter_cls, e)
self.df = load_object(self.dupefilter_cls).from_spider(spider)

This comment has been minimized.

Copy link
@acefei

acefei Nov 21, 2017

why self.df moved out of try-except statement?

This comment has been minimized.

Copy link
@rmax

rmax Nov 21, 2017

Owner

The TypeError handling was meant to catch the error from classes that don't support the expected parameters and raise a friendly error. With .from_spider the class itself is responsible of getting the parameters and/or raise exception if missing.

This comment has been minimized.

Copy link
@acefei

acefei Nov 22, 2017

Make sense, thanks for the answer and tips!


if self.flush_on_start:
self.flush()
Expand Down

0 comments on commit 384996d

Please sign in to comment.