Skip to content

Commit

Permalink
Adding SkinnyTaste (#62)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Jan 13, 2019
1 parent b9eb8fd commit 6343d21
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -17,6 +17,7 @@ scrapy crawl ketovangelist-kitchen -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl low-carb-yum -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl queen-bs -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ruled-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl skinny-taste -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl sugar-free-mom -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl wholesome-yum -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl your-friends-j -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
Expand Down
27 changes: 27 additions & 0 deletions ketohub/spiders.py
Expand Up @@ -296,6 +296,33 @@ class QueenBs(spiders.CrawlSpider):
]


class SkinnyTaste(spiders.CrawlSpider):
name = 'skinny-taste'

callback_handler = CallbackHandler(
content_saver=persist.ContentSaver(_get_download_root()))

allowed_domains = ['skinnytaste.com']
start_urls = ['https://www.skinnytaste.com/recipes/keto/']

rules = [
# Extract links for finding additional recipe pages,
# e.g. https://www.skinnytaste.com/recipes/keto/page/2/
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'skinnytaste.com/recipes/keto/page/\d+/')),
# Extract links for recipes.
spiders.Rule(
linkextractors.LinkExtractor(
allow=[
r'skinnytaste.com/[^\/]+/$',
],
restrict_xpaths='//div[@class="archives"]'),
callback=callback_handler.process_callback,
follow=False),
]


class SugarFreeMom(spiders.CrawlSpider):
name = 'sugar-free-mom'

Expand Down

0 comments on commit 6343d21

Please sign in to comment.