Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mhl committed Sep 15, 2016
0 parents commit 78bbf12
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
/data.sqlite
*~
10 changes: 10 additions & 0 deletions requirements.txt
@@ -0,0 +1,10 @@
# It's easy to add more libraries or choose different versions. Any libraries
# specified here will be installed and made available to your morph.io scraper.
# Find out more: https://morph.io/documentation/python

# Custom version of scraperwiki library
-e git+http://github.com/openaustralia/scraperwiki-python.git@morph_defaults#egg=scraperwiki

lxml==3.4.4
cssselect==0.9.1
requests==2.7.0
37 changes: 37 additions & 0 deletions scraper.py
@@ -0,0 +1,37 @@
#!/usr/bin/env python

import csv
import re
from urlparse import urlsplit

import requests
import scraperwiki

'''This "scraper" just changes the columns in the YourNextMP elected
candidates data from the UK 2015 general election'''

url = 'https://candidates.democracyclub.org.uk/media/candidates-elected-2015.csv'

r = requests.get(url, stream=True)

for row in csv.DictReader(r.raw):
parlparse_person_id = re.sub(r'^.*/(\d+)$', r'\1', row['parlparse_id'])
wikiname = ''
if row['wikipedia_url']:
split = urlsplit(row['wikipedia_url'])
wikiname = split.path[len('/wiki/'):]
wikiname.replace('_', ' ')
scraperwiki.sqlite.save(
unique_keys=['id'],
data={
'id': parlparse_person_id,
'name': row['name'],
'twitter': row['twitter_username'],
'facebook': row['facebook_page_url'],
'wikipedia': row['wikipedia_url'],
'wikiname': wikiname,
'birth_date': row['birth_date'],
'linkedin': row['linkedin_url'],
'image': row['image_url'],
}
)

0 comments on commit 78bbf12

Please sign in to comment.