From 84eec5f39edad65f2a92ee260e383899ea2de3dd Mon Sep 17 00:00:00 2001 From: Paul Bradshaw Date: Thu, 25 Jan 2018 12:03:41 +0000 Subject: [PATCH] Update scraper.py --- scraper.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scraper.py b/scraper.py index 69bea68..c04b7b7 100644 --- a/scraper.py +++ b/scraper.py @@ -1,15 +1,19 @@ # This is a template for a Python scraper on morph.io (https://morph.io) # including some code snippets below that you should find helpful -# import scraperwiki -# import lxml.html +import scraperwiki +import lxml.html # # # Read in a page -# html = scraperwiki.scrape("http://foo.com") +html = scraperwiki.scrape("http://www.imdb.com/chart/toptv/?ref_=nv_tvv_250_3") # # # Find something on the page using css selectors -# root = lxml.html.fromstring(html) -# root.cssselect("div[align='left']") +root = lxml.html.fromstring(html) +##main > div > span > div > div > div.lister > table > tbody > tr:nth-child(1) > td.titleColumn > a +links = root.cssselect("td.titleColumn a") +for link in links: + print link + print link.text() # # # Write out to the sqlite database using scraperwiki library # scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})