Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mcvax committed Feb 6, 2015
1 parent eb15779 commit cb91220
Showing 1 changed file with 10 additions and 23 deletions.
33 changes: 10 additions & 23 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,20 @@

mytest = el.text_content()

print str(mytest.encode('utf-8'))
#print str(mytest.encode('utf-8'))

if hasattr(el, 'h1'):
print 'One' + el.cssselect('h1')[0].text_content()

if hasattr(el, 'div h1'):
print 'One' + el.cssselect('div h1')[0].text_content()

if hasattr(el, 'div div h1'):
print 'One' + el.cssselect('div div h1')[0].text_content()

if hasattr(el, 'div div div h1'):
print 'One' + el.cssselect('div div div h1')[0].text_content()

if hasattr(el, 'h2'):
print 'Two' + el.cssselect('h2')[0].text_content()

Expand All @@ -60,26 +69,4 @@
artist = el.cssselect("div div div H2")[0].text_content()
else:
artist = ''

#artist2 = el.cssselect("H2").text_content()
#publisher = el.cssselect("H3")[0].text_content()
#link = el.attrib['href']
#isbn = link.split("/")[2]
pos += 1

print title
print artist
#print link
#print isbn
#link = "http://www.readings.com.au" + link
record = {"title" : title,
"artist" : artist,
#"artist2" : artist,
#"publisher" : publisher,
#"isbn" : isbn,
#"link" : link,
#"pos" : pos,
"sdate" : time.strftime( "%Y-%m-%d" )
}

scraperwiki.sqlite.save(unique_keys=["sdate"], data=record)

0 comments on commit cb91220

Please sign in to comment.