Skip to content

Commit

Permalink
Merge pull request #3 from pyconsk/master
Browse files Browse the repository at this point in the history
Updated issue when morph.io was breaking due to unicode names problem
  • Loading branch information
Richard Kellner committed Oct 10, 2015
2 parents 4c92df2 + 9d719af commit 1b8d366
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions scraper.py
Expand Up @@ -118,20 +118,23 @@ def parse_html(html, term_nr):
try:
data_row['speech_video'] = links[0].attrs['href']
except IndexError:
print(u'Meeting no. %s (%s - %s) speech video link for %s has not been found!' %
(data_row['meeting_number'], data_row['time_from'], data_row['time_to'], data_row['member']))
print('Meeting no. %s (%s - %s) speech video link for %s has not been found!' %
(data_row['meeting_number'], data_row['time_from'], data_row['time_to'],
data_row['member'].encode('ascii','ignore')))

try:
data_row['proceedings_video'] = links[1].attrs['href']
except IndexError:
print(u'Meeting no. %s (%s - %s) proceedings video link for %s link has not been found!' % \
(data_row['meeting_number'], data_row['time_from'], data_row['time_to'], data_row['member']))
print('Meeting no. %s (%s - %s) proceedings video link for %s link has not been found!' % \
(data_row['meeting_number'], data_row['time_from'], data_row['time_to'],
data_row['member'].encode('ascii','ignore')))

try:
data_row['transcript'] = links[2].attrs['href']
except IndexError:
print(u'Meeting no. %s (%s - %s) transcript link for %s has not been found!' % \
(data_row['meeting_number'], data_row['time_from'], data_row['time_to'], data_row['member']))
print('Meeting no. %s (%s - %s) transcript link for %s has not been found!' % \
(data_row['meeting_number'], data_row['time_from'], data_row['time_to'],
data_row['member'].encode('ascii','ignore')))

data_rows.append(data_row)

Expand Down

0 comments on commit 1b8d366

Please sign in to comment.