diff --git a/debug-temporal.sh b/debug-temporal.sh new file mode 100644 index 00000000..bbf43acc --- /dev/null +++ b/debug-temporal.sh @@ -0,0 +1,12 @@ +# >>> json.dumps(asdict(ObservationsWorkflowParams(probe_cc=["IT"], start_day="2024-01-01", end_day="2024-01-02", clickhouse="clickhouse://localhost/", data_dir="/Users/art/repos/ooni/data/tests/data/", parallelism=10, fast_fail=False, test_name=["signal"]))) +# +# +INPUT_JSON="{\"probe_cc\": [\"IT\"], \"test_name\": [\"signal\"], \"start_day\": \"2024-01-01\", \"end_day\": \"2024-01-20\", \"clickhouse\": \"clickhouse://localhost/\", \"data_dir\": \"$(pwd)/tests/data/datadir/\", \"parallelism\": 10, \"fast_fail\": false, \"log_level\": 20}" + +echo $INPUT_JSON +temporal workflow start \ + --task-queue oonidatapipeline-task-queue \ + --type ObservationsWorkflow \ + --namespace default \ + --input "$INPUT_JSON" + diff --git a/oonidata/cli/command.py b/oonidata/cli/command.py index 60b664d2..62b556d7 100644 --- a/oonidata/cli/command.py +++ b/oonidata/cli/command.py @@ -16,13 +16,6 @@ from oonidata.db.connections import ClickhouseConnection from oonidata.db.create_tables import create_queries, list_all_table_diffs from oonidata.netinfo import NetinfoDB -from oonidata.workers import ( - start_fingerprint_hunter, - start_observation_maker, - start_ground_truth_builder, - start_response_archiver, -) -from oonidata.workers.analysis import start_analysis log = logging.getLogger("oonidata") @@ -125,294 +118,6 @@ def sync( ) -@cli.command() -@probe_cc_option -@test_name_option -@start_day_option -@end_day_option -@click.option("--clickhouse", type=str) -@click.option( - "--data-dir", - type=Path, - required=True, - help="data directory to store fingerprint and geoip databases", -) -@click.option( - "--parallelism", - type=int, - default=multiprocessing.cpu_count() + 2, - help="number of processes to use. Only works when writing to a database", -) -@click.option( - "--fast-fail", - is_flag=True, - help="should we fail immediately when we encounter an error?", -) -@click.option( - "--create-tables", - is_flag=True, - help="should we attempt to create the required clickhouse tables", -) -@click.option( - "--drop-tables", - is_flag=True, - help="should we drop tables before creating them", -) -def mkobs( - probe_cc: List[str], - test_name: List[str], - start_day: date, - end_day: date, - clickhouse: Optional[str], - data_dir: Path, - parallelism: int, - fast_fail: bool, - create_tables: bool, - drop_tables: bool, -): - """ - Make observations for OONI measurements and write them into clickhouse or a CSV file - """ - if create_tables: - if not clickhouse: - click.echo("--clickhouse needs to be specified when creating tables") - return 1 - if drop_tables: - click.confirm( - "Are you sure you want to drop the tables before creation?", abort=True - ) - - with ClickhouseConnection(clickhouse) as db: - for query, table_name in create_queries: - if drop_tables: - db.execute(f"DROP TABLE IF EXISTS {table_name};") - db.execute(query) - - NetinfoDB(datadir=data_dir, download=True) - - start_observation_maker( - probe_cc=probe_cc, - test_name=test_name, - start_day=start_day, - end_day=end_day, - clickhouse=clickhouse, - data_dir=data_dir, - parallelism=parallelism, - fast_fail=fast_fail, - ) - - -@cli.command() -@probe_cc_option -@test_name_option -@start_day_option -@end_day_option -@click.option("--clickhouse", type=str, required=True) -@click.option( - "--data-dir", - type=Path, - required=True, - help="data directory to store fingerprint and geoip databases", -) -@click.option( - "--parallelism", - type=int, - default=multiprocessing.cpu_count(), - help="number of processes to use. Only works when writing to a database", -) -@click.option( - "--fast-fail", - is_flag=True, - help="should we fail immediately when we encounter an error?", -) -@click.option( - "--create-tables", - is_flag=True, - help="should we attempt to create the required clickhouse tables", -) -@click.option( - "--rebuild-ground-truths", - is_flag=True, - help="should we force the rebuilding of ground truths", -) -def mker( - probe_cc: List[str], - test_name: List[str], - start_day: date, - end_day: date, - clickhouse: str, - data_dir: Path, - parallelism: int, - fast_fail: bool, - create_tables: bool, - rebuild_ground_truths: bool, -): - if create_tables: - with ClickhouseConnection(clickhouse) as db: - for query, table_name in create_queries: - click.echo(f"Running create query for {table_name}") - db.execute(query) - raise Exception("Run this via the analysis command") - - -@cli.command() -@probe_cc_option -@test_name_option -@start_day_option -@end_day_option -@click.option("--clickhouse", type=str, required=True) -@click.option( - "--data-dir", - type=Path, - required=True, - help="data directory to store fingerprint and geoip databases", -) -@click.option( - "--parallelism", - type=int, - default=multiprocessing.cpu_count() + 2, - help="number of processes to use. Only works when writing to a database", -) -@click.option( - "--fast-fail", - is_flag=True, - help="should we fail immediately when we encounter an error?", -) -@click.option( - "--create-tables", - is_flag=True, - help="should we attempt to create the required clickhouse tables", -) -@click.option( - "--rebuild-ground-truths", - is_flag=True, - help="should we force the rebuilding of ground truths", -) -def mkanalysis( - probe_cc: List[str], - test_name: List[str], - start_day: date, - end_day: date, - clickhouse: str, - data_dir: Path, - parallelism: int, - fast_fail: bool, - create_tables: bool, - rebuild_ground_truths: bool, -): - if create_tables: - with ClickhouseConnection(clickhouse) as db: - for query, table_name in create_queries: - click.echo(f"Running create query for {table_name}") - db.execute(query) - - start_analysis( - probe_cc=probe_cc, - test_name=test_name, - start_day=start_day, - end_day=end_day, - clickhouse=clickhouse, - data_dir=data_dir, - parallelism=parallelism, - fast_fail=fast_fail, - rebuild_ground_truths=rebuild_ground_truths, - ) - - -@cli.command() -@start_day_option -@end_day_option -@click.option("--clickhouse", type=str, required=True) -@click.option( - "--data-dir", - type=Path, - required=True, - help="data directory to store fingerprint and geoip databases", -) -@click.option( - "--parallelism", - type=int, - default=multiprocessing.cpu_count() + 2, - help="number of processes to use. Only works when writing to a database", -) -def mkgt( - start_day: date, - end_day: date, - clickhouse: str, - data_dir: Path, - parallelism: int, -): - start_ground_truth_builder( - start_day=start_day, - end_day=end_day, - clickhouse=clickhouse, - data_dir=data_dir, - parallelism=parallelism, - ) - - -@cli.command() -@probe_cc_option -@test_name_option -@start_day_option -@end_day_option -@click.option("--clickhouse", type=str) -@click.option("--data-dir", type=Path, required=True) -@click.option("--archives-dir", type=Path, required=True) -@click.option( - "--parallelism", - type=int, - default=multiprocessing.cpu_count() + 2, - help="number of processes to use. Only works when writing to a database", -) -def mkbodies( - probe_cc: List[str], - test_name: List[str], - start_day: date, - end_day: date, - clickhouse: str, - data_dir: Path, - archives_dir: Path, - parallelism: int, -): - """ - Make response body archives - """ - start_response_archiver( - probe_cc=probe_cc, - test_name=test_name, - start_day=start_day, - end_day=end_day, - data_dir=data_dir, - archives_dir=archives_dir, - clickhouse=clickhouse, - parallelism=parallelism, - ) - - -@cli.command() -@click.option( - "--data-dir", - type=Path, - required=True, - help="data directory to store fingerprint and geoip databases", -) -@click.option("--archives-dir", type=Path, required=True) -@click.option( - "--parallelism", - type=int, - default=multiprocessing.cpu_count() + 2, - help="number of processes to use", -) -def fphunt(data_dir: Path, archives_dir: Path, parallelism: int): - click.echo("🏹 starting the hunt for blockpage fingerprints!") - start_fingerprint_hunter( - archives_dir=archives_dir, - data_dir=data_dir, - parallelism=parallelism, - ) - - @cli.command() @click.option("--clickhouse", type=str) @click.option( diff --git a/oonidata/datapipeline/__init__.py b/oonidata/datapipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/oonidata/datapipeline/run_worker.py b/oonidata/datapipeline/run_worker.py new file mode 100644 index 00000000..e097bc2d --- /dev/null +++ b/oonidata/datapipeline/run_worker.py @@ -0,0 +1,31 @@ +import asyncio + +import concurrent.futures + +from temporalio.client import Client +from temporalio.worker import Worker + +from .workflows.observations import ObservationsWorkflow +from .workflows.observations import make_observation_in_day + + +async def async_main(): + client = await Client.connect("localhost:7233") + with concurrent.futures.ThreadPoolExecutor(max_workers=100) as activity_executor: + worker = Worker( + client, + task_queue="oonidatapipeline-task-queue", + workflows=[ObservationsWorkflow], + activities=[make_observation_in_day], + activity_executor=activity_executor, + ) + + await worker.run() + + +def main(): + asyncio.run(async_main()) + + +if __name__ == "__main__": + main() diff --git a/oonidata/datapipeline/workflows/__init__.py b/oonidata/datapipeline/workflows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/oonidata/workers/analysis.py b/oonidata/datapipeline/workflows/analysis.py similarity index 100% rename from oonidata/workers/analysis.py rename to oonidata/datapipeline/workflows/analysis.py diff --git a/oonidata/workers/ground_truths.py b/oonidata/datapipeline/workflows/ground_truths.py similarity index 100% rename from oonidata/workers/ground_truths.py rename to oonidata/datapipeline/workflows/ground_truths.py diff --git a/oonidata/datapipeline/workflows/observations.py b/oonidata/datapipeline/workflows/observations.py new file mode 100644 index 00000000..49c00710 --- /dev/null +++ b/oonidata/datapipeline/workflows/observations.py @@ -0,0 +1,283 @@ +import asyncio +import pathlib +import logging +import dataclasses +from datetime import date, datetime, timedelta, timezone + +from typing import ( + List, + Sequence, + Tuple, +) +from temporalio import workflow, activity +from dataclasses import dataclass + +with workflow.unsafe.imports_passed_through(): + import clickhouse_driver + +with workflow.unsafe.imports_passed_through(): + import statsd + from oonidata.analysis.datasources import load_measurement + from oonidata.datautils import PerfTimer + from oonidata.models.nettests import SupportedDataformats + + from oonidata.netinfo import NetinfoDB + + from oonidata.dataclient import ( + date_interval, + list_file_entries_batches, + stream_measurements, + ccs_set, + ) + from oonidata.db.connections import ( + ClickhouseConnection, + ) + from oonidata.transforms import measurement_to_observations + from oonidata.workers.common import ( + get_prev_range, + make_db_rows, + maybe_delete_prev_range, + optimize_all_tables, + ) + +log = logging.getLogger("oonidata.processing") + + +@dataclass +class ObservationsWorkflowParams: + probe_cc: List[str] + test_name: List[str] + start_day: str + end_day: str + clickhouse: str + data_dir: str + parallelism: int + fast_fail: bool + log_level: int = logging.INFO + + +@dataclass +class MakeObservationsParams: + probe_cc: List[str] + test_name: List[str] + clickhouse: str + data_dir: str + fast_fail: bool + bucket_date: str + + +def write_observations_to_db( + msmt: SupportedDataformats, + netinfodb: NetinfoDB, + db: ClickhouseConnection, + bucket_date: str, +): + for observations in measurement_to_observations(msmt, netinfodb=netinfodb): + if len(observations) == 0: + continue + + column_names = [f.name for f in dataclasses.fields(observations[0])] + table_name, rows = make_db_rows( + bucket_date=bucket_date, + dc_list=observations, + column_names=column_names, + ) + db.write_rows(table_name=table_name, rows=rows, column_names=column_names) + + +def make_observations_for_file_entry_batch( + file_entry_batch: Sequence[Tuple[str, str, str, int]], + clickhouse: str, + row_buffer_size: int, + data_dir: pathlib.Path, + bucket_date: str, + probe_cc: List[str], + fast_fail: bool, +): + netinfodb = NetinfoDB(datadir=data_dir, download=False) + tbatch = PerfTimer() + with ClickhouseConnection(clickhouse, row_buffer_size=row_buffer_size) as db: + statsd_client = statsd.StatsClient("localhost", 8125) + ccs = ccs_set(probe_cc) + idx = 0 + for bucket_name, s3path, ext, fe_size in file_entry_batch: + log.info(f"processing file s3://{bucket_name}/{s3path}") + t = PerfTimer() + try: + for msmt_dict in stream_measurements( + bucket_name=bucket_name, s3path=s3path, ext=ext + ): + # Legacy cans don't allow us to pre-filter on the probe_cc, so + # we need to check for probe_cc consistency in here. + if ccs and msmt_dict["probe_cc"] not in ccs: + continue + msmt = None + try: + t = PerfTimer() + msmt = load_measurement(msmt_dict) + if not msmt.test_keys: + log.error( + f"measurement with empty test_keys: ({msmt.measurement_uid})", + exc_info=True, + ) + continue + write_observations_to_db(msmt, netinfodb, db, bucket_date) + # following types ignored due to https://github.com/jsocol/pystatsd/issues/146 + statsd_client.timing("oonidata.make_observations.timed", t.ms, rate=0.1) # type: ignore + statsd_client.incr("oonidata.make_observations.msmt_count", rate=0.1) # type: ignore + idx += 1 + except Exception as exc: + msmt_str = msmt_dict.get("report_id", None) + if msmt: + msmt_str = msmt.measurement_uid + log.error(f"failed at idx: {idx} ({msmt_str})", exc_info=True) + + if fast_fail: + db.close() + raise exc + log.info(f"done processing file s3://{bucket_name}/{s3path}") + except Exception as exc: + log.error( + f"failed to stream measurements from s3://{bucket_name}/{s3path}" + ) + log.error(exc) + statsd_client.timing("oonidata.dataclient.stream_file_entry.timed", t.ms, rate=0.1) # type: ignore + statsd_client.gauge("oonidata.dataclient.file_entry.kb_per_sec.gauge", fe_size / 1024 / t.s, rate=0.1) # type: ignore + statsd_client.timing("oonidata.dataclient.batch.timed", tbatch.ms) # type: ignore + return idx + + +@activity.defn +def make_observation_in_day(params: MakeObservationsParams) -> dict: + statsd_client = statsd.StatsClient("localhost", 8125) + + day = datetime.strptime(params.bucket_date, "%Y-%m-%d").date() + + with ClickhouseConnection(params.clickhouse, row_buffer_size=10_000) as db: + prev_ranges = [] + for table_name in ["obs_web"]: + prev_ranges.append( + ( + table_name, + get_prev_range( + db=db, + table_name=table_name, + bucket_date=params.bucket_date, + test_name=params.test_name, + probe_cc=params.probe_cc, + ), + ) + ) + + t = PerfTimer() + total_t = PerfTimer() + file_entry_batches, total_size = list_file_entries_batches( + probe_cc=params.probe_cc, + test_name=params.test_name, + start_day=day, + end_day=day + timedelta(days=1), + ) + log.info(f"running {len(file_entry_batches)} batches took {t.pretty}") + + total_msmt_count = 0 + for batch in file_entry_batches: + # TODO(art): add extra parallelism here + msmt_cnt = make_observations_for_file_entry_batch( + batch, + params.clickhouse, + 10_000, + pathlib.Path(params.data_dir), + params.bucket_date, + params.probe_cc, + params.fast_fail, + ) + total_msmt_count += msmt_cnt + + mb_per_sec = round(total_size / total_t.s / 10**6, 1) + msmt_per_sec = round(total_msmt_count / total_t.s) + log.info( + f"finished processing all batches in {total_t.pretty} speed: {mb_per_sec}MB/s ({msmt_per_sec}msmt/s)" + ) + statsd_client.timing("oonidata.dataclient.daily.timed", total_t.ms) + + if len(prev_ranges) > 0: + with ClickhouseConnection(params.clickhouse, row_buffer_size=10_000) as db: + for table_name, pr in prev_ranges: + maybe_delete_prev_range(db=db, prev_range=pr) + + return {"size": total_size, "measurement_count": total_msmt_count} + + +@workflow.defn +class ObservationsWorkflow: + @workflow.run + async def run(self, params: ObservationsWorkflowParams) -> dict: + log.info("Optimizing all tables") + optimize_all_tables(params.clickhouse) + + t_total = PerfTimer() + log.info( + f"Starting observation making on {params.probe_cc} ({params.start_day} - {params.end_day})" + ) + task_list = [] + start_day = datetime.strptime(params.start_day, "%Y-%m-%d").date() + end_day = datetime.strptime(params.end_day, "%Y-%m-%d").date() + + async with asyncio.TaskGroup() as tg: + for day in date_interval(start_day, end_day): + task = tg.create_task( + workflow.execute_activity( + make_observation_in_day, + MakeObservationsParams( + probe_cc=params.probe_cc, + test_name=params.test_name, + clickhouse=params.clickhouse, + data_dir=params.data_dir, + fast_fail=params.fast_fail, + bucket_date=day.strftime("%Y-%m-%d"), + ), + start_to_close_timeout=timedelta(minutes=30), + ) + ) + task_list.append(task) + + t = PerfTimer() + # size, msmt_count = + total_size, total_msmt_count = 0, 0 + for task in task_list: + res = task.result() + + total_size += res["size"] + total_msmt_count += res["measurement_count"] + + # This needs to be adjusted once we get the the per entry concurrency working + # mb_per_sec = round(total_size / t.s / 10**6, 1) + # msmt_per_sec = round(total_msmt_count / t.s) + # log.info( + # f"finished processing {day} speed: {mb_per_sec}MB/s ({msmt_per_sec}msmt/s)" + # ) + + # with ClickhouseConnection(params.clickhouse) as db: + # db.execute( + # "INSERT INTO oonidata_processing_logs (key, timestamp, runtime_ms, bytes, msmt_count, comment) VALUES", + # [ + # [ + # "oonidata.bucket_processed", + # datetime.now(timezone.utc).replace(tzinfo=None), + # int(t.ms), + # total_size, + # total_msmt_count, + # day.strftime("%Y-%m-%d"), + # ] + # ], + # ) + + mb_per_sec = round(total_size / t_total.s / 10**6, 1) + msmt_per_sec = round(total_msmt_count / t_total.s) + log.info( + f"finished processing {params.start_day} - {params.end_day} speed: {mb_per_sec}MB/s ({msmt_per_sec}msmt/s)" + ) + log.info( + f"{round(total_size/10**9, 2)}GB {total_msmt_count} msmts in {t_total.pretty}" + ) + return {"size": total_size, "measurement_count": total_msmt_count} diff --git a/oonidata/workers/__init__.py b/oonidata/workers/__init__.py index f22d4557..e69de29b 100644 --- a/oonidata/workers/__init__.py +++ b/oonidata/workers/__init__.py @@ -1,4 +0,0 @@ -from .fingerprint_hunter import start_fingerprint_hunter -from .observations import start_observation_maker -from .ground_truths import start_ground_truth_builder -from .response_archiver import start_response_archiver diff --git a/oonidata/workers/observations.py b/oonidata/workers/observations.py deleted file mode 100644 index 23bdf925..00000000 --- a/oonidata/workers/observations.py +++ /dev/null @@ -1,270 +0,0 @@ -import pathlib -import logging -import dataclasses -from datetime import date, datetime, timedelta, timezone - -from typing import ( - List, - Optional, - Sequence, - Tuple, -) - -import statsd - -from dask.distributed import Client as DaskClient -from dask.distributed import progress as dask_progress -from dask.distributed import wait as dask_wait -from dask.distributed import as_completed - -from oonidata.analysis.datasources import load_measurement -from oonidata.datautils import PerfTimer -from oonidata.models.nettests import SupportedDataformats - -from oonidata.netinfo import NetinfoDB - -from oonidata.dataclient import ( - date_interval, - list_file_entries_batches, - stream_measurements, - ccs_set, -) -from oonidata.db.connections import ( - ClickhouseConnection, -) -from oonidata.transforms import measurement_to_observations -from oonidata.workers.common import ( - get_prev_range, - make_db_rows, - maybe_delete_prev_range, - optimize_all_tables, -) - -log = logging.getLogger("oonidata.processing") - - -def write_observations_to_db( - msmt: SupportedDataformats, - netinfodb: NetinfoDB, - db: ClickhouseConnection, - bucket_date: str, -): - for observations in measurement_to_observations(msmt, netinfodb=netinfodb): - if len(observations) == 0: - continue - - column_names = [f.name for f in dataclasses.fields(observations[0])] - table_name, rows = make_db_rows( - bucket_date=bucket_date, - dc_list=observations, - column_names=column_names, - ) - db.write_rows(table_name=table_name, rows=rows, column_names=column_names) - - -def make_observations_for_file_entry_batch( - file_entry_batch: Sequence[Tuple[str, str, str, int]], - clickhouse: str, - row_buffer_size: int, - data_dir: pathlib.Path, - bucket_date: str, - probe_cc: str, - fast_fail: bool, -): - netinfodb = NetinfoDB(datadir=data_dir, download=False) - tbatch = PerfTimer() - with ClickhouseConnection(clickhouse, row_buffer_size=row_buffer_size) as db: - statsd_client = statsd.StatsClient("localhost", 8125) - ccs = ccs_set(probe_cc) - idx = 0 - for bucket_name, s3path, ext, fe_size in file_entry_batch: - log.info(f"processing file s3://{bucket_name}/{s3path}") - t = PerfTimer() - try: - for msmt_dict in stream_measurements( - bucket_name=bucket_name, s3path=s3path, ext=ext - ): - # Legacy cans don't allow us to pre-filter on the probe_cc, so - # we need to check for probe_cc consistency in here. - if ccs and msmt_dict["probe_cc"] not in ccs: - continue - msmt = None - try: - t = PerfTimer() - msmt = load_measurement(msmt_dict) - if not msmt.test_keys: - log.error( - f"measurement with empty test_keys: ({msmt.measurement_uid})", - exc_info=True, - ) - continue - write_observations_to_db(msmt, netinfodb, db, bucket_date) - # following types ignored due to https://github.com/jsocol/pystatsd/issues/146 - statsd_client.timing("oonidata.make_observations.timed", t.ms, rate=0.1) # type: ignore - statsd_client.incr("oonidata.make_observations.msmt_count", rate=0.1) # type: ignore - idx += 1 - except Exception as exc: - msmt_str = msmt_dict.get("report_id", None) - if msmt: - msmt_str = msmt.measurement_uid - log.error(f"failed at idx: {idx} ({msmt_str})", exc_info=True) - - if fast_fail: - db.close() - raise exc - log.info(f"done processing file s3://{bucket_name}/{s3path}") - except Exception as exc: - log.error( - f"failed to stream measurements from s3://{bucket_name}/{s3path}" - ) - log.error(exc) - statsd_client.timing("oonidata.dataclient.stream_file_entry.timed", t.ms, rate=0.1) # type: ignore - statsd_client.gauge("oonidata.dataclient.file_entry.kb_per_sec.gauge", fe_size / 1024 / t.s, rate=0.1) # type: ignore - statsd_client.timing("oonidata.dataclient.batch.timed", tbatch.ms) # type: ignore - return idx - - -def make_observation_in_day( - dask_client: DaskClient, - probe_cc: List[str], - test_name: List[str], - clickhouse: Optional[str], - data_dir: pathlib.Path, - fast_fail: bool, - day: date, -): - statsd_client = statsd.StatsClient("localhost", 8125) - - bucket_date = day.strftime("%Y-%m-%d") - - with ClickhouseConnection(clickhouse, row_buffer_size=10_000) as db: - prev_ranges = [] - for table_name in ["obs_web"]: - prev_ranges.append( - ( - table_name, - get_prev_range( - db=db, - table_name=table_name, - bucket_date=bucket_date, - test_name=test_name, - probe_cc=probe_cc, - ), - ) - ) - - t = PerfTimer() - total_t = PerfTimer() - file_entry_batches, total_size = list_file_entries_batches( - probe_cc=probe_cc, - test_name=test_name, - start_day=day, - end_day=day + timedelta(days=1), - ) - log.info(f"running {len(file_entry_batches)} batches took {t.pretty}") - - future_list = [] - for batch in file_entry_batches: - t = dask_client.submit( - make_observations_for_file_entry_batch, - batch, - clickhouse, - 10_000, - data_dir, - bucket_date, - probe_cc, - fast_fail, - ) - future_list.append(t) - - log.debug("starting progress monitoring") - dask_progress(future_list) - total_msmt_count = 0 - for _, result in as_completed(future_list, with_results=True): - total_msmt_count += result # type: ignore - - log.debug("waiting on task_list") - dask_wait(future_list) - mb_per_sec = round(total_size / total_t.s / 10**6, 1) - msmt_per_sec = round(total_msmt_count / total_t.s) - log.info( - f"finished processing all batches in {total_t.pretty} speed: {mb_per_sec}MB/s ({msmt_per_sec}msmt/s)" - ) - statsd_client.timing("oonidata.dataclient.daily.timed", total_t.ms) - - if len(prev_ranges) > 0: - with ClickhouseConnection(clickhouse, row_buffer_size=10_000) as db: - for table_name, pr in prev_ranges: - maybe_delete_prev_range(db=db, prev_range=pr) - - return total_size, total_msmt_count - - -def start_observation_maker( - probe_cc: List[str], - test_name: List[str], - start_day: date, - end_day: date, - clickhouse: Optional[str], - data_dir: pathlib.Path, - parallelism: int, - fast_fail: bool, - log_level: int = logging.INFO, -): - log.info("Optimizing all tables") - optimize_all_tables(clickhouse) - - dask_client = DaskClient( - threads_per_worker=2, - n_workers=parallelism, - ) - - t_total = PerfTimer() - total_size, total_msmt_count = 0, 0 - day_list = list(date_interval(start_day, end_day)) - # See: https://stackoverflow.com/questions/51099685/best-practices-in-setting-number-of-dask-workers - - log.info(f"Starting observation making on {probe_cc} ({start_day} - {end_day})") - for day in day_list: - t = PerfTimer() - size, msmt_count = make_observation_in_day( - dask_client=dask_client, - probe_cc=probe_cc, - test_name=test_name, - clickhouse=clickhouse, - data_dir=data_dir, - fast_fail=fast_fail, - day=day, - ) - total_size += size - total_msmt_count += msmt_count - mb_per_sec = round(size / t.s / 10**6, 1) - msmt_per_sec = round(msmt_count / t.s) - log.info( - f"finished processing {day} speed: {mb_per_sec}MB/s ({msmt_per_sec}msmt/s)" - ) - with ClickhouseConnection(clickhouse) as db: - db.execute( - "INSERT INTO oonidata_processing_logs (key, timestamp, runtime_ms, bytes, msmt_count, comment) VALUES", - [ - [ - "oonidata.bucket_processed", - datetime.now(timezone.utc).replace(tzinfo=None), - int(t.ms), - size, - msmt_count, - day.strftime("%Y-%m-%d"), - ] - ], - ) - - mb_per_sec = round(total_size / t_total.s / 10**6, 1) - msmt_per_sec = round(total_msmt_count / t_total.s) - log.info( - f"finished processing {start_day} - {end_day} speed: {mb_per_sec}MB/s ({msmt_per_sec}msmt/s)" - ) - log.info( - f"{round(total_size/10**9, 2)}GB {total_msmt_count} msmts in {t_total.pretty}" - ) - - dask_client.shutdown() diff --git a/poetry.lock b/poetry.lock index 5b796d03..d82fbcf7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "altair" version = "4.2.0" description = "Altair: A declarative statistical visualization library for Python." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -27,7 +26,6 @@ dev = ["black", "docutils", "flake8", "ipython", "m2r", "mistune (<2.0.0)", "pyt name = "anyio" version = "4.0.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -49,7 +47,6 @@ trio = ["trio (>=0.22)"] name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" -category = "main" optional = true python-versions = "*" files = [ @@ -61,7 +58,6 @@ files = [ name = "argon2-cffi" version = "23.1.0" description = "Argon2 for Python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -82,7 +78,6 @@ typing = ["mypy"] name = "argon2-cffi-bindings" version = "21.2.0" description = "Low-level CFFI bindings for Argon2" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -120,7 +115,6 @@ tests = ["pytest"] name = "arrow" version = "1.3.0" description = "Better dates & times for Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -134,13 +128,12 @@ types-python-dateutil = ">=2.8.10" [package.extras] doc = ["doc8", "sphinx (>=7.0.0)", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx_rtd_theme (>=1.3.0)"] -test = ["dateparser (>=1.0.0,<2.0.0)", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytz (==2021.1)", "simplejson (>=3.0.0,<4.0.0)"] +test = ["dateparser (==1.*)", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytz (==2021.1)", "simplejson (==3.*)"] [[package]] name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -category = "main" optional = true python-versions = "*" files = [ @@ -159,7 +152,6 @@ test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] name = "async-lru" version = "2.0.4" description = "Simple LRU cache for asyncio" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -174,7 +166,6 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} name = "attrs" version = "22.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -192,7 +183,6 @@ tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy name = "babel" version = "2.13.1" description = "Internationalization utilities" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -211,7 +201,6 @@ dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] name = "backcall" version = "0.2.0" description = "Specifications for callback functions passed in to an API" -category = "main" optional = true python-versions = "*" files = [ @@ -223,7 +212,6 @@ files = [ name = "backports.zoneinfo" version = "0.2.1" description = "Backport of the standard library zoneinfo module" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -252,7 +240,6 @@ tzdata = ["tzdata"] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" optional = true python-versions = ">=3.6.0" files = [ @@ -271,7 +258,6 @@ lxml = ["lxml"] name = "black" version = "22.10.0" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -316,7 +302,6 @@ uvloop = ["uvloop (>=0.15.2)"] name = "bleach" version = "6.1.0" description = "An easy safelist-based HTML-sanitizing tool." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -335,7 +320,6 @@ css = ["tinycss2 (>=1.1.0,<1.3)"] name = "bokeh" version = "2.4.3" description = "Interactive plots and applications in the browser from Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -356,7 +340,6 @@ typing-extensions = ">=3.10.0" name = "boto3" version = "1.26.20" description = "The AWS SDK for Python" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -376,7 +359,6 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "botocore" version = "1.29.20" description = "Low-level, data-driven core of boto 3." -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -396,7 +378,6 @@ crt = ["awscrt (==0.15.3)"] name = "certifi" version = "2022.9.24" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -408,7 +389,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = "*" files = [ @@ -485,7 +465,6 @@ pycparser = "*" name = "charset-normalizer" version = "2.1.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -500,7 +479,6 @@ unicode-backport = ["unicodedata2"] name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -515,7 +493,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "click-loglevel" version = "0.5.0" description = "Log level parameter type for Click" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -530,7 +507,6 @@ click = ">=8.0" name = "clickhouse-driver" version = "0.2.5" description = "Python driver with native interface for ClickHouse" -category = "main" optional = false python-versions = ">=3.6, <4" files = [ @@ -643,7 +619,6 @@ zstd = ["clickhouse-cityhash (>=1.0.2.1)", "zstd"] name = "cloudpickle" version = "2.2.1" description = "Extended pickling support for Python objects" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -655,7 +630,6 @@ files = [ name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -667,7 +641,6 @@ files = [ name = "comm" version = "0.1.4" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -687,7 +660,6 @@ typing = ["mypy (>=0.990)"] name = "commonmark" version = "0.9.1" description = "Python parser for the CommonMark Markdown spec" -category = "dev" optional = false python-versions = "*" files = [ @@ -702,7 +674,6 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] name = "coverage" version = "6.5.0" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -768,7 +739,6 @@ toml = ["tomli"] name = "cryptography" version = "38.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -815,7 +785,6 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0 name = "dask" version = "2023.3.2" description = "Parallel PyData with Task Scheduling" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -852,7 +821,6 @@ test = ["pandas[test]", "pre-commit", "pytest", "pytest-rerunfailures", "pytest- name = "debugpy" version = "1.8.0" description = "An implementation of the Debug Adapter Protocol for Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -880,7 +848,6 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -892,7 +859,6 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -904,7 +870,6 @@ files = [ name = "distributed" version = "2023.3.2" description = "Distributed scheduler for Dask" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -933,7 +898,6 @@ zict = ">=2.1.0" name = "entrypoints" version = "0.4" description = "Discover and load entry points from installed packages." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -945,7 +909,6 @@ files = [ name = "exceptiongroup" version = "1.0.4" description = "Backport of PEP 654 (exception groups)" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -960,7 +923,6 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -975,7 +937,6 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth name = "fastjsonschema" version = "2.18.1" description = "Fastest Python implementation of JSON schema" -category = "main" optional = true python-versions = "*" files = [ @@ -990,7 +951,6 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc name = "flask" version = "2.2.2" description = "A simple framework for building complex web applications." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1013,7 +973,6 @@ dotenv = ["python-dotenv"] name = "fqdn" version = "1.5.1" description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" -category = "main" optional = true python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" files = [ @@ -1025,7 +984,6 @@ files = [ name = "fsspec" version = "2023.4.0" description = "File-system specification" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1061,7 +1019,6 @@ tqdm = ["tqdm"] name = "gprof2dot" version = "2022.7.29" description = "Generate a dot graph from the output of several profilers." -category = "dev" optional = false python-versions = ">=2.7" files = [ @@ -1073,7 +1030,6 @@ files = [ name = "heapdict" version = "1.0.1" description = "a heap with decrease-key and increase-key operations" -category = "main" optional = false python-versions = "*" files = [ @@ -1085,7 +1041,6 @@ files = [ name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1097,7 +1052,6 @@ files = [ name = "importlib-metadata" version = "5.1.0" description = "Read metadata from Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1117,7 +1071,6 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "importlib-resources" version = "5.10.1" description = "Read resources from Python packages" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1136,7 +1089,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "iniconfig" version = "1.1.1" description = "iniconfig: brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = "*" files = [ @@ -1148,7 +1100,6 @@ files = [ name = "ipykernel" version = "6.26.0" description = "IPython Kernel for Jupyter" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1162,7 +1113,7 @@ comm = ">=0.1.1" debugpy = ">=1.6.5" ipython = ">=7.23.1" jupyter-client = ">=6.1.12" -jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" matplotlib-inline = ">=0.1" nest-asyncio = "*" packaging = "*" @@ -1182,7 +1133,6 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio" name = "ipython" version = "8.12.3" description = "IPython: Productive Interactive Computing" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1222,7 +1172,6 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa name = "isoduration" version = "20.11.0" description = "Operations with ISO 8601 durations" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1237,7 +1186,6 @@ arrow = ">=0.15.0" name = "itsdangerous" version = "2.1.2" description = "Safely pass data to untrusted environments and back." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1249,7 +1197,6 @@ files = [ name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -1269,7 +1216,6 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1287,7 +1233,6 @@ i18n = ["Babel (>=2.7)"] name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1299,7 +1244,6 @@ files = [ name = "json5" version = "0.9.14" description = "A Python implementation of the JSON5 data format." -category = "main" optional = true python-versions = "*" files = [ @@ -1314,7 +1258,6 @@ dev = ["hypothesis"] name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -1326,7 +1269,6 @@ files = [ name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1356,7 +1298,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jupyter-client" version = "8.5.0" description = "Jupyter protocol implementation and client libraries" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1366,7 +1307,7 @@ files = [ [package.dependencies] importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""} -jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" python-dateutil = ">=2.8.2" pyzmq = ">=23.0" tornado = ">=6.2" @@ -1380,7 +1321,6 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt name = "jupyter-core" version = "5.5.0" description = "Jupyter core package. A base package on which Jupyter projects rely." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1401,7 +1341,6 @@ test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"] name = "jupyter-events" version = "0.6.3" description = "Jupyter Event System library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1426,7 +1365,6 @@ test = ["click", "coverage", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>= name = "jupyter-lsp" version = "2.2.0" description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1442,7 +1380,6 @@ jupyter-server = ">=1.1.2" name = "jupyter-server" version = "2.9.1" description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1455,7 +1392,7 @@ anyio = ">=3.1.0" argon2-cffi = "*" jinja2 = "*" jupyter-client = ">=7.4.4" -jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" jupyter-events = ">=0.6.0" jupyter-server-terminals = "*" nbconvert = ">=6.4.4" @@ -1479,7 +1416,6 @@ test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-sc name = "jupyter-server-terminals" version = "0.4.4" description = "A Jupyter Server Extension Providing Terminals." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1499,7 +1435,6 @@ test = ["coverage", "jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-cov", name = "jupyterlab" version = "4.0.7" description = "JupyterLab computational environment" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1533,7 +1468,6 @@ test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-cons name = "jupyterlab-pygments" version = "0.2.2" description = "Pygments theme using JupyterLab CSS variables" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1545,7 +1479,6 @@ files = [ name = "jupyterlab-server" version = "2.24.0" description = "A set of server components for JupyterLab and JupyterLab like applications." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1572,7 +1505,6 @@ test = ["hatch", "ipykernel", "jupyterlab-server[openapi]", "openapi-spec-valida name = "locket" version = "1.0.0" description = "File-based locks for Python on Linux and Windows" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1584,7 +1516,6 @@ files = [ name = "lxml" version = "4.9.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -1670,7 +1601,6 @@ source = ["Cython (>=0.29.7)"] name = "lz4" version = "4.3.2" description = "LZ4 Bindings for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1720,7 +1650,6 @@ tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] name = "markupsafe" version = "2.1.1" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1770,7 +1699,6 @@ files = [ name = "mashumaro" version = "3.1.1" description = "Fast serialization framework on top of dataclasses" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1791,7 +1719,6 @@ yaml = ["pyyaml (>=3.13)"] name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -1806,7 +1733,6 @@ traitlets = "*" name = "maxminddb" version = "2.2.0" description = "Reader for the MaxMind DB format" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1817,7 +1743,6 @@ files = [ name = "memray" version = "1.4.1" description = "A memory profiler for Python applications" -category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -1857,7 +1782,6 @@ test = ["Cython", "greenlet", "ipython", "pytest", "pytest-cov"] name = "mistune" version = "3.0.2" description = "A sane and fast Markdown parser with useful plugins and renderers" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1869,7 +1793,6 @@ files = [ name = "msgpack" version = "1.0.4" description = "MessagePack serializer" -category = "main" optional = false python-versions = "*" files = [ @@ -1931,7 +1854,6 @@ files = [ name = "mypy" version = "0.991" description = "Optional static typing for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1982,7 +1904,6 @@ reports = ["lxml"] name = "mypy-extensions" version = "0.4.3" description = "Experimental type system extensions for programs checked with the mypy typechecker." -category = "dev" optional = false python-versions = "*" files = [ @@ -1994,7 +1915,6 @@ files = [ name = "nbclient" version = "0.8.0" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." -category = "main" optional = true python-versions = ">=3.8.0" files = [ @@ -2004,7 +1924,7 @@ files = [ [package.dependencies] jupyter-client = ">=6.1.12" -jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" nbformat = ">=5.1" traitlets = ">=5.4" @@ -2017,7 +1937,6 @@ test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>= name = "nbconvert" version = "7.10.0" description = "Converting Jupyter Notebooks" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2056,7 +1975,6 @@ webpdf = ["playwright"] name = "nbformat" version = "5.9.2" description = "The Jupyter Notebook format" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2078,7 +1996,6 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "nest-asyncio" version = "1.5.8" description = "Patch asyncio to allow nested event loops" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -2090,7 +2007,6 @@ files = [ name = "notebook-shim" version = "0.2.3" description = "A shim layer for notebook traits and config" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2108,7 +2024,6 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync" name = "numpy" version = "1.24.2" description = "Fundamental package for array computing in Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2146,7 +2061,6 @@ files = [ name = "objprint" version = "0.2.2" description = "A library that can print Python objects in human readable format" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2158,7 +2072,6 @@ files = [ name = "orjson" version = "3.8.2" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2217,7 +2130,6 @@ files = [ name = "overrides" version = "7.4.0" description = "A decorator to automatically detect mismatch when overriding a method." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2229,7 +2141,6 @@ files = [ name = "packaging" version = "21.3" description = "Core utilities for Python packages" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2244,7 +2155,6 @@ pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" name = "pandas" version = "2.0.0" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2278,8 +2188,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2312,7 +2222,6 @@ xml = ["lxml (>=4.6.3)"] name = "pandocfilters" version = "1.5.0" description = "Utilities for writing pandoc filters in python" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2324,7 +2233,6 @@ files = [ name = "parso" version = "0.8.3" description = "A Python Parser" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2340,7 +2248,6 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "partd" version = "1.3.0" description = "Appendable key-value storage" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2359,7 +2266,6 @@ complete = ["blosc", "numpy (>=1.9.0)", "pandas (>=0.19.0)", "pyzmq"] name = "pathspec" version = "0.10.2" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2371,7 +2277,6 @@ files = [ name = "pexpect" version = "4.8.0" description = "Pexpect allows easy control of interactive console applications." -category = "main" optional = true python-versions = "*" files = [ @@ -2386,7 +2291,6 @@ ptyprocess = ">=0.5" name = "pickleshare" version = "0.7.5" description = "Tiny 'shelve'-like database with concurrency support" -category = "main" optional = true python-versions = "*" files = [ @@ -2398,7 +2302,6 @@ files = [ name = "pillow" version = "9.5.0" description = "Python Imaging Library (Fork)" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2478,7 +2381,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2490,7 +2392,6 @@ files = [ name = "platformdirs" version = "2.5.4" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2506,7 +2407,6 @@ test = ["appdirs (==1.4.4)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2522,7 +2422,6 @@ testing = ["pytest", "pytest-benchmark"] name = "prometheus-client" version = "0.17.1" description = "Python client for the Prometheus monitoring system." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2537,7 +2436,6 @@ twisted = ["twisted"] name = "prompt-toolkit" version = "3.0.39" description = "Library for building powerful interactive command lines in Python" -category = "main" optional = true python-versions = ">=3.7.0" files = [ @@ -2548,11 +2446,30 @@ files = [ [package.dependencies] wcwidth = "*" +[[package]] +name = "protobuf" +version = "4.25.3" +description = "" +optional = true +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"}, + {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"}, + {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"}, + {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"}, + {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"}, + {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"}, + {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"}, + {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"}, + {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, +] + [[package]] name = "psutil" version = "5.9.4" description = "Cross-platform lib for process and system monitoring in Python." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2579,7 +2496,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -category = "main" optional = true python-versions = "*" files = [ @@ -2591,7 +2507,6 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -category = "main" optional = true python-versions = "*" files = [ @@ -2606,7 +2521,6 @@ tests = ["pytest"] name = "py-cpuinfo" version = "9.0.0" description = "Get CPU info with pure Python" -category = "dev" optional = false python-versions = "*" files = [ @@ -2618,7 +2532,6 @@ files = [ name = "pyarrow" version = "11.0.0" description = "Python library for Apache Arrow" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2656,7 +2569,6 @@ numpy = ">=1.16.6" name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2668,7 +2580,6 @@ files = [ name = "pygments" version = "2.13.0" description = "Pygments is a syntax highlighting package written in Python." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2683,7 +2594,6 @@ plugins = ["importlib-metadata"] name = "pyopenssl" version = "22.1.0" description = "Python wrapper module around the OpenSSL library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2702,7 +2612,6 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"] name = "pyparsing" version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -2717,7 +2626,6 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pyrsistent" version = "0.19.2" description = "Persistent/Functional/Immutable data structures" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2749,7 +2657,6 @@ files = [ name = "pytest" version = "7.2.0" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2773,7 +2680,6 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2. name = "pytest-benchmark" version = "4.0.0" description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2794,7 +2700,6 @@ histogram = ["pygal", "pygaljs"] name = "pytest-cov" version = "4.0.0" description = "Pytest plugin for measuring coverage." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2813,7 +2718,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale name = "pytest-profiling" version = "1.7.0" description = "Profiling plugin for py.test" -category = "dev" optional = false python-versions = "*" files = [ @@ -2833,7 +2737,6 @@ tests = ["pytest-virtualenv"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2848,7 +2751,6 @@ six = ">=1.5" name = "python-json-logger" version = "2.0.7" description = "A python library adding a json log formatter" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2860,7 +2762,6 @@ files = [ name = "pytz" version = "2022.6" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -2872,7 +2773,6 @@ files = [ name = "pytz-deprecation-shim" version = "0.1.0.post0" description = "Shims to make deprecation of pytz easier" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -2888,7 +2788,6 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""} name = "pywin32" version = "306" description = "Python for Window Extensions" -category = "main" optional = true python-versions = "*" files = [ @@ -2912,7 +2811,6 @@ files = [ name = "pywinpty" version = "2.0.12" description = "Pseudo terminal support for Windows from Python." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2928,7 +2826,6 @@ files = [ name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2978,7 +2875,6 @@ files = [ name = "pyzmq" version = "25.1.1" description = "Python bindings for 0MQ" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3084,7 +2980,6 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} name = "requests" version = "2.28.1" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7, <4" files = [ @@ -3106,7 +3001,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rfc3339-validator" version = "0.1.4" description = "A pure python RFC3339 validator" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3121,7 +3015,6 @@ six = "*" name = "rfc3986-validator" version = "0.1.1" description = "Pure python rfc3986 validator" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3133,7 +3026,6 @@ files = [ name = "rich" version = "12.6.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "dev" optional = false python-versions = ">=3.6.3,<4.0.0" files = [ @@ -3153,7 +3045,6 @@ jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] name = "s3transfer" version = "0.6.0" description = "An Amazon S3 Transfer Manager" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -3171,7 +3062,6 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] name = "send2trash" version = "1.8.2" description = "Send file to trash natively under Mac OS X, Windows and Linux" -category = "main" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -3188,7 +3078,6 @@ win32 = ["pywin32"] name = "setuptools" version = "68.2.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3205,7 +3094,6 @@ testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jar name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3217,7 +3105,6 @@ files = [ name = "snakeviz" version = "2.1.1" description = "A web-based viewer for Python profiler output" -category = "dev" optional = false python-versions = "*" files = [ @@ -3232,7 +3119,6 @@ tornado = ">=2.0" name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3244,7 +3130,6 @@ files = [ name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" -category = "main" optional = false python-versions = "*" files = [ @@ -3256,7 +3141,6 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3268,7 +3152,6 @@ files = [ name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -category = "main" optional = true python-versions = "*" files = [ @@ -3288,7 +3171,6 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] name = "statsd" version = "4.0.1" description = "A simple statsd client." -category = "main" optional = false python-versions = "*" files = [ @@ -3300,7 +3182,6 @@ files = [ name = "tabulate" version = "0.9.0" description = "Pretty-print tabular data" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3315,7 +3196,6 @@ widechars = ["wcwidth"] name = "tblib" version = "1.7.0" description = "Traceback serialization library." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3323,11 +3203,35 @@ files = [ {file = "tblib-1.7.0.tar.gz", hash = "sha256:059bd77306ea7b419d4f76016aef6d7027cc8a0785579b5aad198803435f882c"}, ] +[[package]] +name = "temporalio" +version = "1.5.1" +description = "Temporal.io Python SDK" +optional = true +python-versions = ">=3.8,<4.0" +files = [ + {file = "temporalio-1.5.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cd1f8930787c728e30ca2fecf86175cafd1781d97e3ee7cdf6e41915c566a835"}, + {file = "temporalio-1.5.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2b3765e0b6b0ef0b670cf39720a80280fd35be2444633c715b741d2b5428ceb6"}, + {file = "temporalio-1.5.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:47149204b6430c8553d5dd6dfe2fbc6830bf6fd8ab08463ee4c97885c68f3082"}, + {file = "temporalio-1.5.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1167f6fc31355170cdb4f5f7b89f0f7e36c54d0aecb0ee9aa611f73e32db7d78"}, + {file = "temporalio-1.5.1-cp38-abi3-win_amd64.whl", hash = "sha256:15d36d2038b0ac33511163619bea7ead6f10aca3db5bad4b9d464d3fa0f4ff48"}, + {file = "temporalio-1.5.1.tar.gz", hash = "sha256:4c7bbc8a3e8df1ffc0c7d213bdcad26ae055bdd615567ce1ca4bfa9f28f831b8"}, +] + +[package.dependencies] +protobuf = ">=3.20" +python-dateutil = {version = ">=2.8.2,<3.0.0", markers = "python_version < \"3.11\""} +types-protobuf = ">=3.20" +typing-extensions = ">=4.2.0,<5.0.0" + +[package.extras] +grpc = ["grpcio (>=1.59.0,<2.0.0)"] +opentelemetry = ["opentelemetry-api (>=1.11.1,<2.0.0)", "opentelemetry-sdk (>=1.11.1,<2.0.0)"] + [[package]] name = "terminado" version = "0.17.1" description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3348,7 +3252,6 @@ test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] name = "tinycss2" version = "1.2.1" description = "A tiny CSS parser" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3367,7 +3270,6 @@ test = ["flake8", "isort", "pytest"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3379,7 +3281,6 @@ files = [ name = "toolz" version = "0.12.0" description = "List processing tools and functional utilities" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3391,7 +3292,6 @@ files = [ name = "tornado" version = "6.2" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -3412,7 +3312,6 @@ files = [ name = "tqdm" version = "4.64.1" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -3433,7 +3332,6 @@ telegram = ["requests"] name = "traitlets" version = "5.13.0" description = "Traitlets Python configuration system" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3445,11 +3343,21 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.6.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "types-protobuf" +version = "4.24.0.20240302" +description = "Typing stubs for protobuf" +optional = true +python-versions = ">=3.8" +files = [ + {file = "types-protobuf-4.24.0.20240302.tar.gz", hash = "sha256:f22c00cc0cea9722e71e14d389bba429af9e35a74a949719c167203a5abbe2e4"}, + {file = "types_protobuf-4.24.0.20240302-py3-none-any.whl", hash = "sha256:5c607990f50f14606c2edaf379f8acc7418fef1451b227aa3c6a8a2cbc6ff14a"}, +] + [[package]] name = "types-python-dateutil" version = "2.8.19.14" description = "Typing stubs for python-dateutil" -category = "main" optional = true python-versions = "*" files = [ @@ -3461,7 +3369,6 @@ files = [ name = "typing-extensions" version = "4.4.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3473,7 +3380,6 @@ files = [ name = "tzdata" version = "2022.7" description = "Provider of IANA time zone data" -category = "main" optional = false python-versions = ">=2" files = [ @@ -3485,7 +3391,6 @@ files = [ name = "tzlocal" version = "4.2" description = "tzinfo object for the local timezone" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -3506,7 +3411,6 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"] name = "uri-template" version = "1.3.0" description = "RFC 6570 URI Template Processor" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3521,7 +3425,6 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake name = "urllib3" version = "1.26.13" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3538,7 +3441,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "viztracer" version = "0.15.6" description = "A debugging and profiling tool that can trace and visualize python code execution" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3590,7 +3492,6 @@ full = ["orjson", "rich"] name = "warcio" version = "1.7.4" description = "Streaming WARC (and ARC) IO library" -category = "main" optional = false python-versions = "*" files = [ @@ -3605,7 +3506,6 @@ six = "*" name = "wcwidth" version = "0.2.8" description = "Measures the displayed width of unicode strings in a terminal" -category = "main" optional = true python-versions = "*" files = [ @@ -3617,7 +3517,6 @@ files = [ name = "webcolors" version = "1.13" description = "A library for working with the color formats defined by HTML and CSS." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3633,7 +3532,6 @@ tests = ["pytest", "pytest-cov"] name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -category = "main" optional = true python-versions = "*" files = [ @@ -3645,7 +3543,6 @@ files = [ name = "websocket-client" version = "1.6.4" description = "WebSocket client for Python with low level API options" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3662,7 +3559,6 @@ test = ["websockets"] name = "werkzeug" version = "2.2.2" description = "The comprehensive WSGI web application library." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3680,7 +3576,6 @@ watchdog = ["watchdog"] name = "zict" version = "2.2.0" description = "Mutable mapping tools" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3695,7 +3590,6 @@ heapdict = "*" name = "zipp" version = "3.11.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3708,10 +3602,11 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [extras] -dataviz = ["altair", "numpy", "pandas", "flask"] +datapipeline = ["temporalio"] +dataviz = ["altair", "flask", "numpy", "pandas"] research = ["jupyterlab"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4" -content-hash = "dd5586d165d8e6d333c44d10223f88126f09700de8509a9674adc28adbc584ee" +content-hash = "d9d5bf32044045c897156b1c51d94161503c05395239b74a31fc838d74de9e34" diff --git a/pyproject.toml b/pyproject.toml index 6e6b5f0d..7e585242 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,13 +24,15 @@ tabulate = "^0.9.0" warcio = "^1.7.4" msgpack = "^1.0.4" statsd = "^4.0.1" +click-loglevel = "^0.5.0" + dask = {extras = ["complete"], version = "^2023.3.2"} altair = {version = "^4.2.0", optional = true} numpy = {version = "^1.23.5", optional = true, python = ">=3.8"} pandas = {version = "^2.0.0", optional = true, python = ">=3.8"} flask = {version = "^2.2.2", optional = true} jupyterlab = {version = "^4.0.7", optional = true} -click-loglevel = "^0.5.0" +temporalio = {version = "^1.5.1", optional = true} [tool.poetry.dev-dependencies] pytest = ">=7.2" @@ -45,10 +47,12 @@ viztracer = "^0.15.6" [tool.poetry.scripts] oonidata = 'oonidata.cli:cli' +datapipeline = 'oonidata.datapipeline.run_worker:main' [tool.poetry.extras] dataviz = ["altair", "numpy", "pandas", "flask"] research = ["jupyterlab"] +datapipeline = ["temporalio"] [build-system] requires = ["poetry-core>=1.0.0"]