Skip to content

Commit

Permalink
Adding support for ketovangelist kitchen (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Nov 13, 2017
1 parent 9b4d2f1 commit 8fa54cf
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ scrapy crawl ruled-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketogasm -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl keto-size-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl queen-bs -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketovangelist-kitchen -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
```
37 changes: 37 additions & 0 deletions ketohub/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,43 @@ class KetoSizeMe(spiders.CrawlSpider):
]


class KetovangelistKitchen(spiders.CrawlSpider):
name = 'ketovangelist-kitchen'

callback_handler = CallbackHandler(
content_saver=persist.ContentSaver(_get_download_root()))

allowed_domains = ['ketovangelistkitchen.com']
# Organize start URLs in descending order of category strength (e.g. muffins
# should be categorized as "snack" not "eggs".
start_urls = [
'http://www.ketovangelistkitchen.com/category/appetizers/',
'http://www.ketovangelistkitchen.com/category/sides/',
'http://www.ketovangelistkitchen.com/category/snack/',
'http://www.ketovangelistkitchen.com/category/soup/',
'http://www.ketovangelistkitchen.com/category/sauces-dressings/',
'http://www.ketovangelistkitchen.com/category/fat-bombs/',
'http://www.ketovangelistkitchen.com/category/baked-goods/',
'http://www.ketovangelistkitchen.com/category/beef/',
'http://www.ketovangelistkitchen.com/category/chicken-turkey/',
'http://www.ketovangelistkitchen.com/category/chocolate/',
'http://www.ketovangelistkitchen.com/category/fish/',
'http://www.ketovangelistkitchen.com/category/pork/',
'http://www.ketovangelistkitchen.com/category/nuts/',
'http://www.ketovangelistkitchen.com/category/eggs/',
]

rules = [
# Extract links for recipes.
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'http://(www.)?ketovangelistkitchen.com/.+/$',
restrict_xpaths='//div[@class="entry-content"]'),
callback=callback_handler.process_callback,
follow=False)
]


class QueenBs(spiders.CrawlSpider):
name = 'queen-bs'

Expand Down

0 comments on commit 8fa54cf

Please sign in to comment.