Skip to content

Commit

Permalink
Merge 9a7fb68 into 3c275a4
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Jul 8, 2018
2 parents 3c275a4 + 9a7fb68 commit ba2736f
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -12,6 +12,7 @@ scrapy crawl hey-keto-mama -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl keto-size-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketoconnect -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketogasm -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketovale -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ketovangelist-kitchen -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl low-carb-yum -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl queen-bs -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
Expand Down
25 changes: 25 additions & 0 deletions ketohub/spiders.py
Expand Up @@ -209,6 +209,31 @@ class KetovangelistKitchen(spiders.CrawlSpider):
]


class Ketovale(spiders.CrawlSpider):
name = 'ketovale'

callback_handler = CallbackHandler(
content_saver=persist.ContentSaver(_get_download_root()))

allowed_domains = ['ketovale.com']
start_urls = ['https://www.ketovale.com/category/recipes/']

rules = [
# Extract links for finding additional recipe pages,
# e.g. https://www.ketovale.com/category/recipes/page/3/
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'https://www.ketovale.com/category/recipes/page/\d+/')),
# Extract links for recipes.
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'https://www.ketovale.com/recipe/.*/$',
restrict_xpaths='//h2[@class="entry-title"]'),
callback=callback_handler.process_callback,
follow=False),
]


class LowCarbYum(spiders.CrawlSpider):
name = 'low-carb-yum'

Expand Down

0 comments on commit ba2736f

Please sign in to comment.