Skip to content
Browse files

Issue #262 - fixed message, improved tests, added debug message level…

… to Monitor and added --verbose option to ng_scraper both to help with getting diagnostics from scraper.
  • Loading branch information...
1 parent d068475 commit 86486031479c09336b81ac91e86a4eaa55567034 @nwinklareth nwinklareth committed Mar 2, 2014
View
20 countyapi/management/commands/ng_scraper.py
@@ -6,27 +6,33 @@
from django.core.management.base import BaseCommand
from countyapi.management.scraper.scraper import Scraper
-
-START_DATE = 'start_date'
+from countyapi.management.scraper.monitor import Monitor
log = logging.getLogger('main')
class Command(BaseCommand):
+
+ START_DATE = 'start_date'
+ VERBOSE_MODE = 'verbose_mode'
+
help = "Scrape inmate data from Cook County Sheriff's site."
option_list = BaseCommand.option_list + (
make_option('-d', '--day', type='string', action='store', dest=START_DATE, default=None,
help='%s %s' % ('Specify day to search for missing inmates, format is YYYY-MM-DD.',
'Not specified then searches all')),
+ make_option('--verbose', action="store_true", dest=VERBOSE_MODE, default=False,
+ help='Turn on verbose mode.'),
)
def handle(self, *args, **options):
- log.debug("%s - Started scraping inmates from Cook County Sheriff's site." % datetime.now())
+ monitor = Monitor(log, verbose_debug_level=options[self.VERBOSE_MODE])
+ monitor.debug("%s - Started scraping inmates from Cook County Sheriff's site." % datetime.now())
- scraper = Scraper(log)
- if options[START_DATE]:
- scraper.check_for_missing_inmates(datetime.strptime(options[START_DATE], '%Y-%m-%d').date())
+ scraper = Scraper(monitor)
+ if options[self.START_DATE]:
+ scraper.check_for_missing_inmates(datetime.strptime(options[self.START_DATE], '%Y-%m-%d').date())
else:
scraper.run()
- log.debug("%s - Finished scraping inmates from Cook County Sheriff's site." % datetime.now())
+ monitor.debug("%s - Finished scraping inmates from Cook County Sheriff's site." % datetime.now())
View
4 countyapi/management/scraper/controller.py
@@ -90,7 +90,7 @@ def _find_missing_inmates(self):
self._known_inmates()
elif msg == self._RECEIVED_KNOWN_INMATES_COMMAND:
self._debug('find missing inmates')
- self._search_commands.find_inmates(self._inmates_response,
+ self._search_commands.find_inmates(exclude_list=self._inmates_response,
start_date=self._start_date_missing_inmates)
else:
self._debug('Unknown notification from %s, received - %s' % (notifier, msg))
@@ -100,7 +100,7 @@ def _find_missing_inmates(self):
def _find_new_inmates(self):
end_index = self._end_index_active_inmate_ids_in_search_window()
- self._search_commands.find_inmates(self._active_inmate_ids[0:end_index],
+ self._search_commands.find_inmates(exclude_list=self._active_inmate_ids[0:end_index],
start_date=self._today - ONE_DAY * (NEW_INMATE_SEARCH_WINDOW_SIZE + 1))
def _known_inmates(self):
View
15 countyapi/management/scraper/inmates_scraper.py
@@ -2,6 +2,7 @@
import gevent
from gevent.queue import JoinableQueue
+from monitor import MONITOR_VERBOSE_DMSG_LEVEL
from throwable_commands_queue import ThrowawayCommandsQueue
WORKERS_TO_START = 25
@@ -13,28 +14,26 @@ class InmatesScraper:
FINISHED_PROCESSING = 'InmatesScraper: finished processing'
- def __init__(self, http, inmates, inmate_details_class, monitor, workers_to_start=WORKERS_TO_START, verbose=False):
+ def __init__(self, http, inmates, inmate_details_class, monitor, workers_to_start=WORKERS_TO_START):
self._http = http
self._inmates = inmates
self._inmate_details_class = inmate_details_class
self._monitor = monitor
self._workers_to_start = workers_to_start
- self._verbose = verbose
self._read_commands_q, self._workers = self._setup_command_system()
self._write_commands_q = self._read_commands_q
def create_if_exists(self, arg):
self._put(self._create_if_exists, arg)
def _create_if_exists(self, inmate_id):
- if self._verbose:
- self._debug('check for inmate - %s' % inmate_id)
+ self._debug('check for inmate - %s' % inmate_id, MONITOR_VERBOSE_DMSG_LEVEL)
worked, inmate_details_in_html = self._http.get(CCJ_INMATE_DETAILS_URL + inmate_id)
if worked:
self._inmates.add(self._inmate_details_class(inmate_details_in_html))
- def _debug(self, msg):
- self._monitor.debug('InmatesScraper: %s' % msg)
+ def _debug(self, msg, debug_level=None):
+ self._monitor.debug('InmatesScraper: %s' % msg, debug_level)
def finish(self):
self._prevent_new_requests_from_being_processed()
@@ -60,10 +59,10 @@ def resurrect_if_found(self, inmate_id):
self._put(self._resurrect_if_found, inmate_id)
def _resurrect_if_found(self, inmate_id):
- if self._verbose:
- self._debug('check if really discharged inmate %s' % inmate_id)
+ self._debug('check if really discharged inmate %s' % inmate_id, MONITOR_VERBOSE_DMSG_LEVEL)
worked, inmate_details_in_html = self._http.get(CCJ_INMATE_DETAILS_URL + inmate_id)
if worked:
+ self._debug('resurrected discharged inmate %s' % inmate_id, MONITOR_VERBOSE_DMSG_LEVEL)
self._inmates.update(self._inmate_details_class(inmate_details_in_html))
def _setup_command_system(self):
View
12 countyapi/management/scraper/monitor.py
@@ -3,6 +3,9 @@
from gevent.queue import Queue
from datetime import datetime
+MONITOR_DEFAULT_DMSG_LEVEL = 1
+MONITOR_VERBOSE_DMSG_LEVEL = 2
+
class Monitor:
"""
@@ -12,14 +15,17 @@ class Monitor:
notifications
"""
- def __init__(self, log, no_debug_msgs=False):
+ def __init__(self, log, no_debug_msgs=False, verbose_debug_mode=False):
self._log = log
self._debug_msgs = not no_debug_msgs
+ self._debug_msg_level = MONITOR_VERBOSE_DMSG_LEVEL if verbose_debug_mode else MONITOR_DEFAULT_DMSG_LEVEL
self._messages = self._setup_msg_system()
self._notifications = self._setup_notification_queue()
- def debug(self, msg):
- if self._debug_msgs:
+ def debug(self, msg, debug_level=None):
+ if debug_level is None:
+ debug_level = MONITOR_DEFAULT_DMSG_LEVEL
+ if self._debug_msgs and debug_level <= self._debug_msg_level:
self._debug(datetime.now(), msg)
def _debug(self, timestamp, msg):
View
8 countyapi/management/scraper/scraper.py
@@ -1,6 +1,5 @@
from controller import Controller
-from monitor import Monitor
from search_commands import SearchCommands
from inmates_scraper import InmatesScraper
from inmates import Inmates
@@ -11,14 +10,13 @@
class Scraper:
- def __init__(self, log):
- self._monitor = Monitor(log)
+ def __init__(self, monitor):
+ self._monitor = monitor
def check_for_missing_inmates(self, start_date):
self._debug('started check_for_missing_inmates')
inmates = Inmates(Inmate, self._monitor)
- inmates_scraper = InmatesScraper(Http(), inmates, InmateDetails, self._monitor, workers_to_start=70,
- verbose=True)
+ inmates_scraper = InmatesScraper(Http(), inmates, InmateDetails, self._monitor, workers_to_start=70)
search_commands = SearchCommands(inmates_scraper, self._monitor)
controller = Controller(self._monitor, search_commands, inmates_scraper, inmates)
controller.find_missing_inmates(start_date)
View
2 countyapi/management/scraper/search_commands.py
@@ -15,7 +15,7 @@ class SearchCommands:
_NOTIFICATION_MSG_TEMPLATE = 'SearchCommands: finished generating %s'
FINISHED_FIND_INMATES = _NOTIFICATION_MSG_TEMPLATE % 'find inmates commands'
FINISHED_CHECK_OF_RECENTLY_DISCHARGED_INMATES = \
- _NOTIFICATION_MSG_TEMPLATE % 'finished generate check of recently discharged inmates commands'
+ _NOTIFICATION_MSG_TEMPLATE % 'check of recently discharged inmates commands'
FINISHED_UPDATE_INMATES_STATUS = _NOTIFICATION_MSG_TEMPLATE % 'update inmates status'
def __init__(self, inmate_scraper, monitor):
View
4 scripts/scraper.sh
@@ -16,7 +16,9 @@ INMATE_API='http://cookcountyjail.recoveredfactory.net/api/1.0/countyinmate/'
DB_BACKUPS_DIR=${HOME}/website/1.0/db_backups
DB_BACKUP_FILE=cookcountyjail-$(date +%Y-%m-%d).json
-${MANAGE} ng_scraper
+SCRAPER_OPTIONS=--verbose
+
+${MANAGE} ng_scraper ${SCRAPER_OPTIONS}
echo "Cook County Jail scraper finished scrapping at `date`"
View
6 tests/test_controller.py
@@ -66,7 +66,7 @@ def test_scraping(self):
send_response(controller, active_jail_ids)
assert self._search.update_inmates_status.call_args_list == [call(active_jail_ids)]
self.send_notification(self._search, SearchCommands.FINISHED_UPDATE_INMATES_STATUS)
- assert self._search.find_inmates.call_args_list == [call(missing_inmate_exclude_list,
+ assert self._search.find_inmates.call_args_list == [call(exclude_list=missing_inmate_exclude_list,
start_date=date.today() - ONE_DAY * 6)]
self.send_notification(self._search, SearchCommands.FINISHED_FIND_INMATES)
assert inmates.recently_discharged_inmates_ids.call_args_list == [call(controller.inmates_response_q)]
@@ -86,9 +86,9 @@ def test_search_missing_inmates(self):
controller_missing_inmates(controller, start_date)
assert inmates.known_inmates_ids_starting_with.call_args_list == [call(controller.inmates_response_q,
start_date)]
- known_inmate_ids = []
+ known_inmate_ids = ['1', '2']
send_response(controller, known_inmate_ids)
- assert self._search.find_inmates.call_args_list == [call([], start_date=start_date)]
+ assert self._search.find_inmates.call_args_list == [call(exclude_list=known_inmate_ids, start_date=start_date)]
self.send_notification(self._search, SearchCommands.FINISHED_FIND_INMATES)
assert self._inmate_scraper.finish.call_args_list == [call()]
self.send_notification(self._inmate_scraper, InmatesScraper.FINISHED_PROCESSING)
View
17 tests/test_monitor.py
@@ -1,7 +1,7 @@
-from countyapi.management.scraper.monitor import Monitor
+from countyapi.management.scraper.monitor import Monitor, MONITOR_VERBOSE_DMSG_LEVEL
-from mock import Mock
+from mock import Mock, call
class Test_Monitor:
@@ -22,6 +22,19 @@ def test_debug_msgs_off(self):
monitor.debug(expected)
assert not log.debug.called, 'log.debug should not have been called'
+ def test_verbose_debug_mode(self):
+ expected = 'hi'
+ log = Mock()
+ monitor = Monitor(log)
+ monitor.debug(expected)
+ monitor.debug(expected, debug_level=MONITOR_VERBOSE_DMSG_LEVEL)
+ assert len(log.debug.call_args_list) == 1
+ log = Mock()
+ monitor = Monitor(log, verbose_debug_mode=True)
+ monitor.debug(expected)
+ monitor.debug(expected, debug_level=MONITOR_VERBOSE_DMSG_LEVEL)
+ assert len(log.debug.call_args_list) == 2
+
def test_notify(self):
notifier = Mock(spec=Test_Monitor)
expected = (notifier, '')

0 comments on commit 8648603

Please sign in to comment.
Something went wrong with that request. Please try again.