Adding SkinnyTaste (#62)

mtlynch · Jan 13, 2019 · 6343d21 · 6343d21
1 parent b9eb8fd
commit 6343d21
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ scrapy crawl ketovangelist-kitchen -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
 scrapy crawl low-carb-yum -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
 scrapy crawl queen-bs -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
 scrapy crawl ruled-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
+scrapy crawl skinny-taste -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
 scrapy crawl sugar-free-mom -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
 scrapy crawl wholesome-yum -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
 scrapy crawl your-friends-j -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"

diff --git a/ketohub/spiders.py b/ketohub/spiders.py
@@ -296,6 +296,33 @@ class QueenBs(spiders.CrawlSpider):
     ]
 
 
+class SkinnyTaste(spiders.CrawlSpider):
+    name = 'skinny-taste'
+
+    callback_handler = CallbackHandler(
+        content_saver=persist.ContentSaver(_get_download_root()))
+
+    allowed_domains = ['skinnytaste.com']
+    start_urls = ['https://www.skinnytaste.com/recipes/keto/']
+
+    rules = [
+        # Extract links for finding additional recipe pages,
+        # e.g. https://www.skinnytaste.com/recipes/keto/page/2/
+        spiders.Rule(
+            linkextractors.LinkExtractor(
+                allow=r'skinnytaste.com/recipes/keto/page/\d+/')),
+        # Extract links for recipes.
+        spiders.Rule(
+            linkextractors.LinkExtractor(
+                allow=[
+                    r'skinnytaste.com/[^\/]+/$',
+                ],
+                restrict_xpaths='//div[@class="archives"]'),
+            callback=callback_handler.process_callback,
+            follow=False),
+    ]
+
+
 class SugarFreeMom(spiders.CrawlSpider):
     name = 'sugar-free-mom'