Permalink
Browse files

Merge pull request #2 from j91321/master

Get filenames using re, fixing KeyError: "There is no item named 'OBCE.XLS' in the archive"
  • Loading branch information...
2 parents 030057d + 4a0e251 commit 602cde0ba857fd4369dba8735e15ecf8a6fdaa41 @hanecak hanecak committed on GitHub Oct 10, 2016
Showing with 8 additions and 2 deletions.
  1. +8 −2 scraper.py
View
@@ -4,15 +4,21 @@
import urllib2
import xlrd
import zipfile
+import re
from cStringIO import StringIO
url = "http://www.posta.sk/subory/322/psc-obci-a-ulic.zip"
archive_file = StringIO(urllib2.urlopen(url).read())
archive = zipfile.ZipFile(archive_file)
+archive_namelist = archive.namelist()
+r1 = re.compile('^obce.', re.IGNORECASE)
+r2 = re.compile('^ulice.', re.IGNORECASE)
+obce_filename = filter(r1.match, archive_namelist)[0]
+ulice_filename = filter(r2.match, archive_namelist)[0]
# Post codes for cities
-wb = xlrd.open_workbook(file_contents=archive.read('OBCE.XLS'))
+wb = xlrd.open_workbook(file_contents=archive.read(obce_filename))
sheet = wb.sheets()[0]
for row in range(1, sheet.nrows):
data = {
@@ -24,7 +30,7 @@
scraperwiki.sqlite.save(unique_keys=['obec'], data=data, table_name="towns")
# Streets
-wb = xlrd.open_workbook(file_contents=archive.read('ULICE.XLS'))
+wb = xlrd.open_workbook(file_contents=archive.read(ulice_filename))
sheet = wb.sheets()[0]
for row in range(1, sheet.nrows):
data = {

0 comments on commit 602cde0

Please sign in to comment.