Skip to content

Commit

Permalink
ahroeuhs
Browse files Browse the repository at this point in the history
  • Loading branch information
tlevine committed Jan 19, 2013
1 parent af75309 commit b096d5c
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
33 changes: 33 additions & 0 deletions reader/src/public_notice.py
Expand Up @@ -30,6 +30,36 @@ def main():
url = 'http://localhost:' + os.environ['PORT'] + '/applications/' + permitApplicationNumber
requests.put(url, doc, auth = ('bot', os.environ['SCRAPER_PASSWORD']))

LOCATION_OF_WORK = re.compile(r'^(LOCATION OF WORK|LOCATION):.*$')
CHARACTER_OF_WORK = re.compile(r'^(CHARACTER OF WORK|DESCRIPTION):.*$')
def _location_of_work(text):
lines = text.split('\n')
in_window = False
out = []
for line in lines:
if re.match(LOCATION_OF_WORK, line):
in_window = True
elif re.match(CHARACTER_OF_WORK, line):
break

if in_window:
out.append(line)
return ''.join(out)

def _character_of_work(text):
lines = text.split('\n')
in_window = False
out = []
for line in lines:
if re.match(CHARACTER_OF_WORK, line):
in_window = True
elif line == '':
break

if in_window:
out.append(line)
return ''.join(out)

def parse(text):
# Parse
guess = read_public_notice(text)
Expand All @@ -44,6 +74,9 @@ def parse(text):
if len(guess['Coords']) > 0:
doc['latitude'], doc['longitude'] = guess['Coords'][0]

doc['description'] = _location_of_work(text)
doc['description'] = _character_of_work(text)

return doc

def read_public_notice(rawtext):
Expand Down
2 changes: 2 additions & 0 deletions server/schema.sql
Expand Up @@ -19,6 +19,8 @@ CREATE TABLE application (
"WQC" TEXT NOT NULL,

-- Manually taken from the public notice
"locationOfWork" TEXT NOT NULL,
"characterOfWork" TEXT NOT NULL,
"longitude" FLOAT,
"latitude" FLOAT,
"acreage" FLOAT,
Expand Down

0 comments on commit b096d5c

Please sign in to comment.