MORPH_URLS

Scrape only URLs in environment variable MORPH_URLS
slow-mo · Jan 22, 2022 · d3e74da · d3e74da
1 parent 83d2000
commit d3e74da
Showing 1 changed file with 13 additions and 10 deletions.
diff --git a/scraper.py b/scraper.py
@@ -36,8 +36,13 @@
 try:
     MORPH_WAIT_SECS = os.environ['MORPH_WAIT_SECS']
 except KeyError:
-    MORPH_WAIT_SECS = 1 # Wait n seconds before each url is scraped
+    MORPH_WAIT_SECS = 1 # Wait 1 second before each url is scraped
 
+try:
+    MORPH_URLS = os.environ['MORPH_URLS']
+except KeyError:
+    MORPH_URLS = None
+
 
 def soupify(url, bs_parser='html.parser'):
     """Scrapes an url and returns a BeautifulSoup object"""
@@ -253,12 +258,10 @@ def parse_feed(url='https://www.liberliber.it/online/feed/'):
         scraperwiki.sqlite.save_var('last_pubDate', tmp_postId)
 
 
-def main():
-    if MORPH_MODE == 'feed':
-        parse_feed()
-    else:
-        build_db(MORPH_LETTERS)
-
-
-if __name__ == "__main__":
-    main()        
+if MORPH_MODE == 'feed':
+    parse_feed()
+elif MORPH_URLS:
+    for i in ','.split(MORPH_URLS):
+        scrape_author(i)
+else:
+    build_db(MORPH_LETTERS)