Permalink
Browse files

check map link

  • Loading branch information...
1 parent 5ad4b8e commit e136eedd91879eab85076f4b048b597bce88a356 @tlevine committed Jun 16, 2013
Showing with 15 additions and 5 deletions.
  1. +7 −1 reader/src/finalip_lib.py
  2. +3 −4 reader/src/finalip_parse.py
  3. +5 −0 reader/src/test_finalip_lib.py
@@ -2,9 +2,15 @@
import datetime
from collections import OrderedDict
+from lxml.html import tostring
+
def parse_row(tr):
row = OrderedDict([(unicode(td.xpath('@headers')[0]), unicode(td.text_content())) for td in tr.xpath('td[@headers!="Map"]')])
- row[u'Map'] = unicode(tr.xpath('descendant::td[@headers="Map"]/a/@href')[0])
+
+ # Map link
+ map_hrefs = tr.xpath('descendant::td[@headers="Map"]/a/@href')
+ row[u'Map'] = None if map_hrefs == [] else unicode(map_hrefs[0])
+
for key in [u'Date Issued\\Denied', u'Public Notice Date']:
row[key] = datetime.datetime.strptime(row[key], '%d-%b-%Y').date()
return row
@@ -12,13 +12,13 @@ def read_finalip(path):
return map(l.parse_row, trs[2:])
# Schema
-dt = Dumptruck(dbname = '/tmp/finalip.db')
+dt = DumpTruck(dbname = '/tmp/finalip.db')
dt.create_table({u'DA Number': u'NAE-2009-01067'}, 'finalip', if_not_exists = True)
-dt.create_index(['Da Number'], unique = True, if_not_exists = True)
+dt.create_index(['Da Number'], 'finalip', unique = True, if_not_exists = True)
# Populate
for dirname, subdirnames, filenames in os.walk(os.path.join(os.environ['READER_ROOT'], '..', 'finalips')):
- if subdirs != []:
+ if subdirnames != []:
continue
for filename in filenames:
year, month = map(int, dirname.split('/')[-2:])
@@ -28,4 +28,3 @@ def read_finalip(path):
row['Month'] = month
row['Page'] = filename
dt.upsert(data, 'finalip')
-
@@ -23,3 +23,8 @@ def test_parse_row():
n.assert_list_equal(observed.keys(),expected.keys())
for k in observed.keys():
n.assert_equal(observed[k], expected[k])
+
+def test_parse_row_empty_map():
+ tr = fromstring('''<tr class="ui-widget-content jqgrow ui-row-ltr"><td headers="District">Nashville</td><td headers="DA Number">LRN-2009-00420</td><td headers="Applicant">Nashville District Corps of Engineers, Regulatory Branch</td><td headers="Project Name">Re-Issuance of Regional Permit for Additions to Existing Commercial Marinas in the Tennessee River Basin</td><td headers="Permit Type">Standard Permit</td><td headers="Public Notice Date">09-MAR-2009</td><td headers="Action Taken">Issued With Special Conditions</td><td headers="Date Issued\Denied">22-APR-2009</td><td align="center" headers="Map"> - </td></tr>''')
+ observed = l.parse_row(tr)
+ n.assert_equal(observed[u'Map'], None)

0 comments on commit e136eed

Please sign in to comment.