Skip to content

Commit

Permalink
Detect florida-shelters missing from irma-shelters
Browse files Browse the repository at this point in the history
This code compares the shelters listed in florida-shelters.json with the
shelters we have manually added to irma-shelters.json via the
https://irma-api.herokuapp.com/ tool. It generates a list of potentially
missing shelters, based on geohash location comparisons.

These can be fixed either by adding the missing shelter, or by pasting the map
URL of the suggested shelter into a comment on https://github.com/simonw/irma-
scraped-data/issues/2 to mark it as whitelisted.

Changes to the list of missing shelters will be published to Slack.

References simonw/disaster-data#1
  • Loading branch information
Simon Willison committed Sep 12, 2017
1 parent 0884f55 commit 113b221
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 0 deletions.
2 changes: 2 additions & 0 deletions irma.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from irma_shelters import (
IrmaShelters,
IrmaShelterDupes,
IrmaSheltersFloridaMissing,
)
from gis_scrapers import (
FemaOpenShelters,
Expand Down Expand Up @@ -450,6 +451,7 @@ def fetch_data(self):
GemaAnimalShelters,
GemaActiveShelters,
ScegOutages,
IrmaSheltersFloridaMissing,
)
]
while True:
Expand Down
103 changes: 103 additions & 0 deletions irma_shelters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from base_scraper import BaseScraper
import requests
import Geohash
import re

IGNORE_DUPE_IDS = {
456, # Hialeah Middle School
Expand Down Expand Up @@ -215,3 +216,105 @@ def fetch_data(self):
'view_url': 'https://irma-api.herokuapp.com/shelters/%s' % shelter['id'],
} for shelter in no_latlons]
}


map_url_re = re.compile(
r'http://maps.google.com/maps\?saddr=&daddr=-?\d+\.\d+,-?\d+\.\d+'
)


class IrmaSheltersFloridaMissing(BaseScraper):
filepath = 'florida-shelters-missing.json'
our_url = 'https://raw.githubusercontent.com/simonw/irma-scraped-data/master/irma-shelters.json'
their_url = 'https://raw.githubusercontent.com/simonw/irma-scraped-data/master/florida-shelters.json'
issue_comments_url = 'https://api.github.com/repos/simonw/irma-scraped-data/issues/2/comments'

def create_message(self, new_data):
return self.update_message([], new_data, 'Created')

def update_message(self, old_data, new_data, verb='Updated'):
previous_map_urls = [
d['map_url'] for d in old_data
]
current_map_urls = [
d['map_url'] for d in new_data
]
added_map_urls = [
map_url for map_url in current_map_urls
if map_url not in previous_map_urls
]
removed_map_urls = [
map_url for map_url in previous_map_urls
if map_url not in current_map_urls
]

message = []

if added_map_urls:
message.append('New potentially missing shelters:')

for map_url in added_map_urls:
shelter = [s for s in new_data if s['map_url'] == map_url][0]
message.append(' %s (%s County)' % (shelter['name'], shelter['county']))
message.append(' Type: ' + shelter['type'])
message.append(' ' + shelter['address'])
message.append(' ' + shelter['city'])
message.append(' ' + shelter['map_url'])
message.append('')

if added_map_urls and removed_map_urls:
message.append('')

if removed_map_urls:
message.append('Previous missing shelters now resolved:')

for map_url in removed_map_urls:
shelter = [s for s in old_data if s['map_url'] == map_url][0]
message.append(' %s (%s County)' % (shelter['name'], shelter['county']))

body = '\n'.join(message)
summary = []
if added_map_urls:
summary.append('%d potentially missing shelter%s detected' % (
len(added_map_urls), '' if len(added_map_urls) == 1 else 's',
))
if removed_map_urls:
summary.append('%d shelter%s resolved' % (
len(removed_map_urls), '' if len(removed_map_urls) == 1 else 's',
))
if summary:
summary_text = self.filepath + ': ' + (', '.join(summary))
else:
summary_text = '%s %s' % (verb, self.filepath)
return summary_text + '\n\n' + body

def fetch_data(self):
our_shelters = requests.get(self.our_url).json()
their_shelters = requests.get(self.their_url).json()
our_geohashes = set([
Geohash.encode(s['latitude'], s['longitude'], 6)
for s in our_shelters
])
for shelter in their_shelters:
coords = shelter['map_url'].split('daddr=')[1]
latitude, longitude = map(float, coords.split(','))
geohash = Geohash.encode(latitude, longitude, 6)
shelter['geohash'] = geohash
maybe_missing_shelters = [
s for s in their_shelters
if s['geohash'] not in our_geohashes
]
comments = requests.get(
self.issue_comments_url,
headers={
'Authorization': 'token %s' % self.github_token,
},
).json()
ignore_map_urls = []
for comment in comments:
ignore_map_urls.extend(map_url_re.findall(comment['body']))
maybe_missing_shelters = [
s for s in maybe_missing_shelters
if s['map_url'] not in ignore_map_urls
]
return maybe_missing_shelters

0 comments on commit 113b221

Please sign in to comment.