Permalink
Browse files

skip finished things

  • Loading branch information...
tlevine committed Jun 16, 2013
1 parent c9b440d commit 419b4eacc69b256b10b4ada3d25f9c4914f18fce
Showing with 14 additions and 4 deletions.
  1. +14 −4 reader/src/finalip_parse.py
@@ -22,15 +22,25 @@ def do_row(tr):
dt.create_table({u'DA Number': u'NAE-2009-01067'}, 'finalip', if_not_exists = True)
dt.create_index(['Da Number'], 'finalip', unique = True, if_not_exists = True)
+# Skip finished stuff
+pages = set([(row['Year'], row['Month'], row['Page']) for row in dt.execute('SELECT Year, Month, Page FROM finalip')])
+
# Populate
for dirname, subdirnames, filenames in os.walk(os.path.join(os.environ['READER_ROOT'], '..', 'finalips')):
if subdirnames != []:
continue
for filename in filenames:
year, month = map(int, dirname.split('/')[-2:])
- data = read_finalip(os.path.join(dirname, filename))
+ page = (year, month, filename)
+ if page in pages:
+ continue
+
+ path = os.path.join(dirname, filename)
+ try:
+ data = read_finalip(path)
+ except:
+ print path
+ raise
for row in data:
- row['Year'] = year
- row['Month'] = month
- row['Page'] = filename
+ row['Year'], row['Month'], row['Page'] = page
dt.upsert(data, 'finalip')

0 comments on commit 419b4ea

Please sign in to comment.