Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

update ScraperWiki base class record batch import stats

  • Loading branch information...
commit 85436650866b942df12ce3318503cec9df86fdcc 1 parent 214b7a1
@copelco copelco authored
Showing with 18 additions and 9 deletions.
  1. +18 −9 openrural/retrieval/scraperwiki.py
View
27 openrural/retrieval/scraperwiki.py
@@ -3,6 +3,7 @@
import urllib
import urllib2
import logging
+import datetime
import traceback
import ebdata.retrieval.log # sets up base handlers.
@@ -10,10 +11,12 @@
from ebpub.geocoder import GeocodingException, ParsingError, AmbiguousResult
from ebpub.streets.models import ImproperCity
-from openrural.error_log.models import Error as ScraperError
+from openrural.error_log import models as error_log
+
logging.getLogger().setLevel(logging.DEBUG)
+
class ScraperWikiScraper(NewsItemListDetailScraper):
url = "http://api.scraperwiki.com/api/1.0/datastore/sqlite"
@@ -26,10 +29,12 @@ def __init__(self, *args, **kwargs):
super(ScraperWikiScraper, self).__init__(*args, **kwargs)
if clear:
self._create_schema()
+ # these are incremented by NewsItemListDetailScraper
self.num_added = 0
- self.num_total = 0
- self.num_geocode = 0
- self.num_geocode_success = 0
+ self.num_changed = 0
+ self.num_skipped = 0
+ self.batch = \
+ error_log.Batch.objects.create(scraper=self.schema_slugs[0])
def get_query(self, select='*', limit=10, offset=0):
where = ''
@@ -72,19 +77,23 @@ def list_pages(self):
def parse_list(self, data):
for row in json.loads(data):
+ self.batch.num += 1
yield row
def update(self):
super(ScraperWikiScraper, self).update()
- geocode_rate = float(self.num_geocode_success) / self.num_geocode
- self.logger.info('Geocode success rate {:.2%}'.format(geocode_rate))
+ self.batch.end_time = datetime.datetime.now()
+ self.batch.num_added = self.num_added
+ self.batch.num_changed = self.num_changed
+ self.batch.num_skipped = self.num_skipped
+ self.batch.save()
def geocode(self, location_name, zipcode=None):
"""
Tries to geocode the given location string, returning a Point object
or None.
"""
- self.num_geocode += 1
+ self.batch.num_geocoded += 1
# Try to lookup the adress, if it is ambiguous, attempt to use
# any provided zipcode information to resolve the ambiguity.
# The zipcode is not included in the initial pass because it
@@ -93,7 +102,7 @@ def geocode(self, location_name, zipcode=None):
# or street number data.
try:
loc = self._geocoder.geocode(location_name)
- self.num_geocode_success += 1
+ self.batch.num_geocoded_success += 1
return loc
except AmbiguousResult as result:
# try to resolve based on zipcode...
@@ -116,7 +125,7 @@ def geocode(self, location_name, zipcode=None):
else:
return in_zip[0]
except (GeocodingException, ParsingError, ImproperCity) as e:
- ScraperError.objects.create(
+ self.batch.errors.create(
scraper=self.schema_slugs[0],
name=type(e).__name__,
location=location_name,
Please sign in to comment.
Something went wrong with that request. Please try again.