Skip to content

Commit

Permalink
Merge pull request #29 from open-contracting/cron
Browse files Browse the repository at this point in the history
New command check-collections
  • Loading branch information
odscjames committed Jan 24, 2019
2 parents 104153f + 0ac5d20 commit 3110882
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 1 deletion.
30 changes: 30 additions & 0 deletions docs/cli-check-collections.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Command line tool - check-collections option
===========================================

This command checks all data so far in all collections.

It can be run multiple times, and data already checked will not be rechecked.

You should only run one of these at once, as if two are run at once they may try and do the same work.

.. code-block:: shell-session
python ocdskingfisher-process-cli check-collections
Running from cron
-----------------

You can also pass a maximum number of seconds that the process should run for.

.. code-block:: shell-session
python ocdskingfisher-process-cli check-collections --runforseconds 60
Soon after that number of seconds has passed, the command will exit.
(The command will finish the check it's currently doing before stopping, so it may run slightly longer than specified. Allow a minute extra to be safe.)

You can use this option with a cron entry; set a cron entry for this command to run every hour and pass runforseconds as 3540 (60 seconds/minute * 59 minutes).

Then when new data appears in the system, there is no need for someone to run :doc:`cli-check-collection` by hand - the process run by cron will pick up the new data itself eventually.

The runforseconds option will make sure that only one of these cron processes runs at once.
1 change: 1 addition & 0 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ You can pass the `verbose` flag to all sub commands, to get more output printed
cli-list-collections.rst
cli-local-load.rst
cli-check-collection.rst
cli-check-collections.rst
cli-new-transform-compile-releases.rst
cli-new-transform-upgrade-1-0-to-1-1.rst
cli-transform-collection.rst
Expand Down
18 changes: 17 additions & 1 deletion ocdskingfisherprocess/checks.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
from libcoveocds.api import ocds_json_output, APIException
import tempfile
import shutil
import datetime


class Checks:

def __init__(self, database, collection):
def __init__(self, database, collection, run_until_timestamp=None):
self.database = database
self.collection = collection
self.run_until_timestamp = run_until_timestamp

def process_all_files(self):
if not self.collection.check_data and not self.collection.check_older_data_with_schema_version_1_1:
# nothing to do here, so ...
return

for file_model in self.database.get_all_files_in_collection(self.collection.database_id):
self.process_file(file_model=file_model)
if self.run_until_timestamp and self.run_until_timestamp < datetime.datetime.utcnow().timestamp():
return

def process_file(self, file_model):
for file_item_model in self.database.get_all_files_items_in_file(file_model):
self.process_file_item(file_item_model=file_item_model)
if self.run_until_timestamp and self.run_until_timestamp < datetime.datetime.utcnow().timestamp():
return

def process_file_item(self, file_item_model):
with self.database.get_engine().begin() as connection:
Expand All @@ -32,6 +42,9 @@ def process_file_item(self, file_item_model):
and self.is_schema_version_less_than_1_1(release_row['package_data_id']) \
and not self.database.is_release_check_done(release_row['id'], override_schema_version="1.1"):
self.check_release_row(release_row, override_schema_version="1.1")
# Early return?
if self.run_until_timestamp and self.run_until_timestamp < datetime.datetime.utcnow().timestamp():
return

del release_rows

Expand All @@ -49,6 +62,9 @@ def process_file_item(self, file_item_model):
and self.is_schema_version_less_than_1_1(record_row['package_data_id']) \
and not self.database.is_record_check_done(record_row['id'], override_schema_version="1.1"):
self.check_record_row(record_row, override_schema_version="1.1")
# Early return?
if self.run_until_timestamp and self.run_until_timestamp < datetime.datetime.utcnow().timestamp():
return

def handle_package(self, package):
cove_temp_folder = tempfile.mkdtemp(prefix='ocdskingfisher-cove-', dir=tempfile.gettempdir())
Expand Down
30 changes: 30 additions & 0 deletions ocdskingfisherprocess/cli/commands/check_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import ocdskingfisherprocess.database
import ocdskingfisherprocess.cli.commands.base
from ocdskingfisherprocess.checks import Checks
import datetime
from threading import Timer
import os


class CheckCollectionsCLICommand(ocdskingfisherprocess.cli.commands.base.CLICommand):
command = 'check-collections'

def configure_subparser(self, subparser):
subparser.add_argument("--runforseconds",
help="Run for this many seconds only.")

def run_command(self, args):
run_until_timestamp = None
run_for_seconds = int(args.runforseconds) if args.runforseconds else 0
if run_for_seconds > 0:
run_until_timestamp = datetime.datetime.utcnow().timestamp() + run_for_seconds

# This is a safeguard - the process should stop itself but this will kill it if it does not.
def exitfunc():
os._exit(0)

Timer(run_for_seconds + 60, exitfunc).start()

for collection in self.database.get_all_collections():
checks = Checks(self.database, collection, run_until_timestamp=run_until_timestamp)
checks.process_all_files()

0 comments on commit 3110882

Please sign in to comment.