Skip to content

Commit

Permalink
Document s3stalkers
Browse files Browse the repository at this point in the history
  • Loading branch information
mraspaud committed Feb 15, 2023
1 parent 685975c commit 8da86da
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 6 deletions.
15 changes: 15 additions & 0 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,21 @@ trollstalker2
New, alternative implementation of trollstalker. Not really needed,
as trollstalker works fine and is actively maintained.


s3stalker
^^^^^^^^^

A counterpart to trollstalker for polling for new files on an s3 bucket.
This is though to be run regularly from eg. cron. For a daemon version of
this, check the next item.

s3stalker_daemon
^^^^^^^^^^^^^^^^

The daemon version of s3stalker, that stays on and polls until stopped
(preferably with a SIGTERM).


zipcollector_runner
^^^^^^^^^^^^^^^^^^^

Expand Down
16 changes: 15 additions & 1 deletion pytroll_collectors/s3stalker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,21 @@

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Module to find new files on an s3 bucket."""
"""Module to find new files on an s3 bucket.
The contents of the yaml configuration file should look like this::
s3_kwargs:
anon: false
client_kwargs:
aws_access_key_id: my_accesskey
aws_secret_access_key: my_secret_key
endpoint_url: https://xxx.yyy.zz
fetch_back_to:
hours: 20
file_pattern: '{platform_name:3s}_OL_2_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4d}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.zip'
subject: /segment/2/safe-olci/S3/
""" # noqa

import logging
import posixpath
Expand Down
25 changes: 20 additions & 5 deletions pytroll_collectors/s3stalker_daemon_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,23 @@

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""S3stalker daemon."""
"""S3stalker daemon.
The contents of the yaml configuration file should look like this::
s3_kwargs:
anon: false
client_kwargs:
aws_access_key_id: my_accesskey
aws_secret_access_key: my_secret_key
endpoint_url: https://xxx.yyy.zz
fetch_back_to:
hours: 20
polling_interval:
minutes: 2
file_pattern: '{platform_name:3s}_OL_2_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4d}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.zip'
subject: /segment/2/safe-olci/S3/
""" # noqa
import signal
import time
from datetime import timedelta, datetime
Expand Down Expand Up @@ -41,7 +57,7 @@ def __init__(self, bucket, config, publisher_ready_time=2.5):

self._publisher_ready_time = publisher_ready_time
self._publisher = None
self.loop = False
self.loop = True
self._set_signal_shutdown()

last_fetch_time = datetime.now(UTC) - startup_time
Expand All @@ -51,17 +67,16 @@ def _set_signal_shutdown(self):
"""Set a signal to handle shutdown."""
signal.signal(signal.SIGTERM, self.close)

def _setup_and_start_communication(self):
def _start_communication(self):
"""Set up the Posttroll communication and start the publisher."""
self._publisher = create_publisher_from_dict_config(self.config['publisher'])
with sleeper(self._publisher_ready_time):
self._publisher.start()
self.loop = True

def run(self):
"""Start the s3-stalker daemon/runner in a thread."""
logger.info("Starting up s3stalker.")
self._setup_and_start_communication()
self._start_communication()

while self.loop:
self._fetch_bucket_content_and_publish_new_files()
Expand Down

0 comments on commit 8da86da

Please sign in to comment.