Skip to content
This repository has been archived by the owner on Nov 18, 2023. It is now read-only.

Commit

Permalink
Adjust scraper to new table design.
Browse files Browse the repository at this point in the history
  • Loading branch information
residuum committed Jul 22, 2016
1 parent 1cbf2de commit e412c13
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions scraper.py
Expand Up @@ -3,26 +3,28 @@
import lxml.html
import scraperwiki

html = requests.get('http://www.premierleague.com/en-gb/matchday/league-table.html').content
html = requests.get('http://www.premierleague.com/tables').content

dom = lxml.html.fromstring(html)

premierLeagueData = []

for row in dom.cssselect('tr.club-row'):
pos = int(row.cssselect('.col-pos')[0].text_content())
team = row.cssselect('.col-club')[0].text_content()
goalsFor = int(row.cssselect('.col-gf')[0].text_content())
goalsAgainst = int(row.cssselect('.col-ga')[0].text_content())
goalDifference = int(row.cssselect('.col-gd')[0].text_content())
points = int(row.cssselect('.col-pts')[0].text_content())
for row in dom.cssselect('.mainTableTab table .tableBodyContainer > tr:not(.expandable)'):
pos = int(row.cssselect('.pos .value')[0].text_content())
team = row.cssselect('.team .long')[0].text_content()
goalsFor = int(row.cssselect('td')[7].text_content())
goalsAgainst = int(row.cssselect('td')[8].text_content())
goalDifference = int(row.cssselect('td')[9].text_content())
points = int(row.cssselect('.points')[0].text_content())
#print pos, team,"gf", goalsFor, "ga", goalsAgainst, "gd", goalDifference, "pts", points
teamItem = {'pos':pos,
teamItem = {
'pos':pos,
'team':team,
'gf':goalsFor,
'ga':goalsAgainst,
'gd':goalDifference,
'pts':points}
'pts':points
}
premierLeagueData.append(teamItem)

if len(premierLeagueData) > 0:
Expand All @@ -31,6 +33,5 @@
#add each table line to data store
for teamItem in premierLeagueData:
scraperwiki.sql.save(['team'], teamItem)



else:
raise ValueError('Cannot read table, maybe format or URL has changed.')

0 comments on commit e412c13

Please sign in to comment.