Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbradshaw committed Jan 25, 2018
1 parent 17903a2 commit 84eec5f
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions scraper.py
@@ -1,15 +1,19 @@
# This is a template for a Python scraper on morph.io (https://morph.io)
# including some code snippets below that you should find helpful

# import scraperwiki
# import lxml.html
import scraperwiki
import lxml.html
#
# # Read in a page
# html = scraperwiki.scrape("http://foo.com")
html = scraperwiki.scrape("http://www.imdb.com/chart/toptv/?ref_=nv_tvv_250_3")
#
# # Find something on the page using css selectors
# root = lxml.html.fromstring(html)
# root.cssselect("div[align='left']")
root = lxml.html.fromstring(html)
##main > div > span > div > div > div.lister > table > tbody > tr:nth-child(1) > td.titleColumn > a
links = root.cssselect("td.titleColumn a")
for link in links:
print link
print link.text()
#
# # Write out to the sqlite database using scraperwiki library
# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
Expand Down

0 comments on commit 84eec5f

Please sign in to comment.