Skip to content

Commit

Permalink
MORPH_URLS
Browse files Browse the repository at this point in the history
Scrape only URLs in environment variable MORPH_URLS
  • Loading branch information
slow-mo committed Jan 22, 2022
1 parent 83d2000 commit d3e74da
Showing 1 changed file with 13 additions and 10 deletions.
23 changes: 13 additions & 10 deletions scraper.py
Expand Up @@ -36,8 +36,13 @@
try:
MORPH_WAIT_SECS = os.environ['MORPH_WAIT_SECS']
except KeyError:
MORPH_WAIT_SECS = 1 # Wait n seconds before each url is scraped
MORPH_WAIT_SECS = 1 # Wait 1 second before each url is scraped

try:
MORPH_URLS = os.environ['MORPH_URLS']
except KeyError:
MORPH_URLS = None


def soupify(url, bs_parser='html.parser'):
"""Scrapes an url and returns a BeautifulSoup object"""
Expand Down Expand Up @@ -253,12 +258,10 @@ def parse_feed(url='https://www.liberliber.it/online/feed/'):
scraperwiki.sqlite.save_var('last_pubDate', tmp_postId)


def main():
if MORPH_MODE == 'feed':
parse_feed()
else:
build_db(MORPH_LETTERS)


if __name__ == "__main__":
main()
if MORPH_MODE == 'feed':
parse_feed()
elif MORPH_URLS:
for i in ','.split(MORPH_URLS):
scrape_author(i)
else:
build_db(MORPH_LETTERS)

0 comments on commit d3e74da

Please sign in to comment.