Skip to content

Commit

Permalink
Issue sc3#262 - fixed message, improved tests, added debug message le…
Browse files Browse the repository at this point in the history
…vel to Monitor and added --verbose option to ng_scraper both to help with getting diagnostics from scraper.
  • Loading branch information
nwinklareth committed Mar 2, 2014
1 parent d068475 commit 8648603
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 32 deletions.
20 changes: 13 additions & 7 deletions countyapi/management/commands/ng_scraper.py
Expand Up @@ -6,27 +6,33 @@
from django.core.management.base import BaseCommand

from countyapi.management.scraper.scraper import Scraper

START_DATE = 'start_date'
from countyapi.management.scraper.monitor import Monitor

log = logging.getLogger('main')


class Command(BaseCommand):

START_DATE = 'start_date'
VERBOSE_MODE = 'verbose_mode'

help = "Scrape inmate data from Cook County Sheriff's site."
option_list = BaseCommand.option_list + (
make_option('-d', '--day', type='string', action='store', dest=START_DATE, default=None,
help='%s %s' % ('Specify day to search for missing inmates, format is YYYY-MM-DD.',
'Not specified then searches all')),
make_option('--verbose', action="store_true", dest=VERBOSE_MODE, default=False,
help='Turn on verbose mode.'),
)

def handle(self, *args, **options):
log.debug("%s - Started scraping inmates from Cook County Sheriff's site." % datetime.now())
monitor = Monitor(log, verbose_debug_level=options[self.VERBOSE_MODE])
monitor.debug("%s - Started scraping inmates from Cook County Sheriff's site." % datetime.now())

scraper = Scraper(log)
if options[START_DATE]:
scraper.check_for_missing_inmates(datetime.strptime(options[START_DATE], '%Y-%m-%d').date())
scraper = Scraper(monitor)
if options[self.START_DATE]:
scraper.check_for_missing_inmates(datetime.strptime(options[self.START_DATE], '%Y-%m-%d').date())
else:
scraper.run()

log.debug("%s - Finished scraping inmates from Cook County Sheriff's site." % datetime.now())
monitor.debug("%s - Finished scraping inmates from Cook County Sheriff's site." % datetime.now())
4 changes: 2 additions & 2 deletions countyapi/management/scraper/controller.py
Expand Up @@ -90,7 +90,7 @@ def _find_missing_inmates(self):
self._known_inmates()
elif msg == self._RECEIVED_KNOWN_INMATES_COMMAND:
self._debug('find missing inmates')
self._search_commands.find_inmates(self._inmates_response,
self._search_commands.find_inmates(exclude_list=self._inmates_response,
start_date=self._start_date_missing_inmates)
else:
self._debug('Unknown notification from %s, received - %s' % (notifier, msg))
Expand All @@ -100,7 +100,7 @@ def _find_missing_inmates(self):

def _find_new_inmates(self):
end_index = self._end_index_active_inmate_ids_in_search_window()
self._search_commands.find_inmates(self._active_inmate_ids[0:end_index],
self._search_commands.find_inmates(exclude_list=self._active_inmate_ids[0:end_index],
start_date=self._today - ONE_DAY * (NEW_INMATE_SEARCH_WINDOW_SIZE + 1))

def _known_inmates(self):
Expand Down
15 changes: 7 additions & 8 deletions countyapi/management/scraper/inmates_scraper.py
Expand Up @@ -2,6 +2,7 @@
import gevent
from gevent.queue import JoinableQueue

from monitor import MONITOR_VERBOSE_DMSG_LEVEL
from throwable_commands_queue import ThrowawayCommandsQueue

WORKERS_TO_START = 25
Expand All @@ -13,28 +14,26 @@ class InmatesScraper:

FINISHED_PROCESSING = 'InmatesScraper: finished processing'

def __init__(self, http, inmates, inmate_details_class, monitor, workers_to_start=WORKERS_TO_START, verbose=False):
def __init__(self, http, inmates, inmate_details_class, monitor, workers_to_start=WORKERS_TO_START):
self._http = http
self._inmates = inmates
self._inmate_details_class = inmate_details_class
self._monitor = monitor
self._workers_to_start = workers_to_start
self._verbose = verbose
self._read_commands_q, self._workers = self._setup_command_system()
self._write_commands_q = self._read_commands_q

def create_if_exists(self, arg):
self._put(self._create_if_exists, arg)

def _create_if_exists(self, inmate_id):
if self._verbose:
self._debug('check for inmate - %s' % inmate_id)
self._debug('check for inmate - %s' % inmate_id, MONITOR_VERBOSE_DMSG_LEVEL)
worked, inmate_details_in_html = self._http.get(CCJ_INMATE_DETAILS_URL + inmate_id)
if worked:
self._inmates.add(self._inmate_details_class(inmate_details_in_html))

def _debug(self, msg):
self._monitor.debug('InmatesScraper: %s' % msg)
def _debug(self, msg, debug_level=None):
self._monitor.debug('InmatesScraper: %s' % msg, debug_level)

def finish(self):
self._prevent_new_requests_from_being_processed()
Expand All @@ -60,10 +59,10 @@ def resurrect_if_found(self, inmate_id):
self._put(self._resurrect_if_found, inmate_id)

def _resurrect_if_found(self, inmate_id):
if self._verbose:
self._debug('check if really discharged inmate %s' % inmate_id)
self._debug('check if really discharged inmate %s' % inmate_id, MONITOR_VERBOSE_DMSG_LEVEL)
worked, inmate_details_in_html = self._http.get(CCJ_INMATE_DETAILS_URL + inmate_id)
if worked:
self._debug('resurrected discharged inmate %s' % inmate_id, MONITOR_VERBOSE_DMSG_LEVEL)
self._inmates.update(self._inmate_details_class(inmate_details_in_html))

def _setup_command_system(self):
Expand Down
12 changes: 9 additions & 3 deletions countyapi/management/scraper/monitor.py
Expand Up @@ -3,6 +3,9 @@
from gevent.queue import Queue
from datetime import datetime

MONITOR_DEFAULT_DMSG_LEVEL = 1
MONITOR_VERBOSE_DMSG_LEVEL = 2


class Monitor:
"""
Expand All @@ -12,14 +15,17 @@ class Monitor:
notifications
"""

def __init__(self, log, no_debug_msgs=False):
def __init__(self, log, no_debug_msgs=False, verbose_debug_mode=False):
self._log = log
self._debug_msgs = not no_debug_msgs
self._debug_msg_level = MONITOR_VERBOSE_DMSG_LEVEL if verbose_debug_mode else MONITOR_DEFAULT_DMSG_LEVEL
self._messages = self._setup_msg_system()
self._notifications = self._setup_notification_queue()

def debug(self, msg):
if self._debug_msgs:
def debug(self, msg, debug_level=None):
if debug_level is None:
debug_level = MONITOR_DEFAULT_DMSG_LEVEL
if self._debug_msgs and debug_level <= self._debug_msg_level:
self._debug(datetime.now(), msg)

def _debug(self, timestamp, msg):
Expand Down
8 changes: 3 additions & 5 deletions countyapi/management/scraper/scraper.py
@@ -1,6 +1,5 @@

from controller import Controller
from monitor import Monitor
from search_commands import SearchCommands
from inmates_scraper import InmatesScraper
from inmates import Inmates
Expand All @@ -11,14 +10,13 @@

class Scraper:

def __init__(self, log):
self._monitor = Monitor(log)
def __init__(self, monitor):
self._monitor = monitor

def check_for_missing_inmates(self, start_date):
self._debug('started check_for_missing_inmates')
inmates = Inmates(Inmate, self._monitor)
inmates_scraper = InmatesScraper(Http(), inmates, InmateDetails, self._monitor, workers_to_start=70,
verbose=True)
inmates_scraper = InmatesScraper(Http(), inmates, InmateDetails, self._monitor, workers_to_start=70)
search_commands = SearchCommands(inmates_scraper, self._monitor)
controller = Controller(self._monitor, search_commands, inmates_scraper, inmates)
controller.find_missing_inmates(start_date)
Expand Down
2 changes: 1 addition & 1 deletion countyapi/management/scraper/search_commands.py
Expand Up @@ -15,7 +15,7 @@ class SearchCommands:
_NOTIFICATION_MSG_TEMPLATE = 'SearchCommands: finished generating %s'
FINISHED_FIND_INMATES = _NOTIFICATION_MSG_TEMPLATE % 'find inmates commands'
FINISHED_CHECK_OF_RECENTLY_DISCHARGED_INMATES = \
_NOTIFICATION_MSG_TEMPLATE % 'finished generate check of recently discharged inmates commands'
_NOTIFICATION_MSG_TEMPLATE % 'check of recently discharged inmates commands'
FINISHED_UPDATE_INMATES_STATUS = _NOTIFICATION_MSG_TEMPLATE % 'update inmates status'

def __init__(self, inmate_scraper, monitor):
Expand Down
4 changes: 3 additions & 1 deletion scripts/scraper.sh
Expand Up @@ -16,7 +16,9 @@ INMATE_API='http://cookcountyjail.recoveredfactory.net/api/1.0/countyinmate/'
DB_BACKUPS_DIR=${HOME}/website/1.0/db_backups
DB_BACKUP_FILE=cookcountyjail-$(date +%Y-%m-%d).json

${MANAGE} ng_scraper
SCRAPER_OPTIONS=--verbose

${MANAGE} ng_scraper ${SCRAPER_OPTIONS}

echo "Cook County Jail scraper finished scrapping at `date`"

Expand Down
6 changes: 3 additions & 3 deletions tests/test_controller.py
Expand Up @@ -66,7 +66,7 @@ def test_scraping(self):
send_response(controller, active_jail_ids)
assert self._search.update_inmates_status.call_args_list == [call(active_jail_ids)]
self.send_notification(self._search, SearchCommands.FINISHED_UPDATE_INMATES_STATUS)
assert self._search.find_inmates.call_args_list == [call(missing_inmate_exclude_list,
assert self._search.find_inmates.call_args_list == [call(exclude_list=missing_inmate_exclude_list,
start_date=date.today() - ONE_DAY * 6)]
self.send_notification(self._search, SearchCommands.FINISHED_FIND_INMATES)
assert inmates.recently_discharged_inmates_ids.call_args_list == [call(controller.inmates_response_q)]
Expand All @@ -86,9 +86,9 @@ def test_search_missing_inmates(self):
controller_missing_inmates(controller, start_date)
assert inmates.known_inmates_ids_starting_with.call_args_list == [call(controller.inmates_response_q,
start_date)]
known_inmate_ids = []
known_inmate_ids = ['1', '2']
send_response(controller, known_inmate_ids)
assert self._search.find_inmates.call_args_list == [call([], start_date=start_date)]
assert self._search.find_inmates.call_args_list == [call(exclude_list=known_inmate_ids, start_date=start_date)]
self.send_notification(self._search, SearchCommands.FINISHED_FIND_INMATES)
assert self._inmate_scraper.finish.call_args_list == [call()]
self.send_notification(self._inmate_scraper, InmatesScraper.FINISHED_PROCESSING)
Expand Down
17 changes: 15 additions & 2 deletions tests/test_monitor.py
@@ -1,7 +1,7 @@

from countyapi.management.scraper.monitor import Monitor
from countyapi.management.scraper.monitor import Monitor, MONITOR_VERBOSE_DMSG_LEVEL

from mock import Mock
from mock import Mock, call


class Test_Monitor:
Expand All @@ -22,6 +22,19 @@ def test_debug_msgs_off(self):
monitor.debug(expected)
assert not log.debug.called, 'log.debug should not have been called'

def test_verbose_debug_mode(self):
expected = 'hi'
log = Mock()
monitor = Monitor(log)
monitor.debug(expected)
monitor.debug(expected, debug_level=MONITOR_VERBOSE_DMSG_LEVEL)
assert len(log.debug.call_args_list) == 1
log = Mock()
monitor = Monitor(log, verbose_debug_mode=True)
monitor.debug(expected)
monitor.debug(expected, debug_level=MONITOR_VERBOSE_DMSG_LEVEL)
assert len(log.debug.call_args_list) == 2

def test_notify(self):
notifier = Mock(spec=Test_Monitor)
expected = (notifier, '')
Expand Down

0 comments on commit 8648603

Please sign in to comment.