Skip to content

Commit

Permalink
Adding support for Queen B's
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Nov 12, 2017
1 parent c2c9f4f commit 667897e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -12,4 +12,5 @@ scrapy crawl ketoconnect -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ruled-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketogasm -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl keto-size-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl queen-bs -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
```
29 changes: 29 additions & 0 deletions ketohub/spiders.py
Expand Up @@ -145,3 +145,32 @@ class KetoSizeMe(spiders.CrawlSpider):
callback=callback_handler.process_callback,
follow=False)
]


class QueenBs(spiders.CrawlSpider):
name = 'queen-bs'

callback_handler = CallbackHandler(
content_saver=persist.ContentSaver(_get_download_root()))

allowed_domains = ['queenbsincredibleedibles.com']
start_urls = ['http://queenbsincredibleedibles.com/category/keto/page/1/']

rules = [
# Extract links for finding additional keto recipe pages,
# e.g. http://queenbsincredibleedibles.com/category/keto/page/2/
spiders.Rule(
linkextractors.LinkExtractor(
allow=
r'http://queenbsincredibleedibles.com/category/keto/page/\d+/')
),

# Extract links for recipes,
# e.g. http://queenbsincredibleedibles.com/2017/09/26/creamy-coconut-kale-sausage-soup/
spiders.Rule(
linkextractors.LinkExtractor(
allow=
r'http://queenbsincredibleedibles.com/\d{4}/\d{2}/\d{2}/.*/$'),
callback=callback_handler.process_callback,
follow=False)
]

0 comments on commit 667897e

Please sign in to comment.