diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66d464d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Ignore output of scraper +data.sqlite diff --git a/README.md b/README.md new file mode 100644 index 0000000..e541894 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fce25cc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +# It's easy to add more libraries or choose different versions. Any libraries +# specified here will be installed and made available to your morph.io scraper. +# Find out more: https://morph.io/documentation/python + +# Custom version of scraperwiki library +-e git+http://github.com/openaustralia/scraperwiki-python.git@morph_defaults#egg=scraperwiki + +lxml==3.4.4 +cssselect==0.9.1 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000..c47075b --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-2.7.9 diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..69bea68 --- /dev/null +++ b/scraper.py @@ -0,0 +1,24 @@ +# This is a template for a Python scraper on morph.io (https://morph.io) +# including some code snippets below that you should find helpful + +# import scraperwiki +# import lxml.html +# +# # Read in a page +# html = scraperwiki.scrape("http://foo.com") +# +# # Find something on the page using css selectors +# root = lxml.html.fromstring(html) +# root.cssselect("div[align='left']") +# +# # Write out to the sqlite database using scraperwiki library +# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"}) +# +# # An arbitrary query against the database +# scraperwiki.sql.select("* from data where 'name'='peter'") + +# You don't have to do things with the ScraperWiki and lxml libraries. +# You can use whatever libraries you want: https://morph.io/documentation/python +# All that matters is that your final data is written to an SQLite database +# called "data.sqlite" in the current working directory which has at least a table +# called "data".