From 13dff2b1fb196ba43a7700e3839d8ae01384a658 Mon Sep 17 00:00:00 2001 From: jillnogold <88145832+jillnogold@users.noreply.github.com> Date: Wed, 6 Jul 2022 09:52:05 +0300 Subject: [PATCH] [Docs] Add mount_v3io_to_executor to with_igz_spark (#2090) --- docs/feature-store/feature-sets.md | 5 +++-- docs/runtimes/spark-operator.ipynb | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/feature-store/feature-sets.md b/docs/feature-store/feature-sets.md index ec236cf92c9..4242b7eeba1 100644 --- a/docs/feature-store/feature-sets.md +++ b/docs/feature-store/feature-sets.md @@ -190,10 +190,11 @@ To learn more about deploy_ingestion_service go to {py:class}`~mlrun.feature_sto You can schedule an ingestion job for a feature set on an ongoing basis. The first scheduled job runs on all the data in the source and the subsequent jobs ingest only the deltas since the previous run (from the last timestamp of the previous run until `datetime.now`). Example: -`cron_trigger = "* */1 * * *" #will run every hour +```cron_trigger = "* */1 * * *" #will run every hour source = ParquetSource("myparquet", path=path, time_field="time", schedule=cron_trigger) feature_set = fs.FeatureSet(name=name, entities=[fs.Entity("first_name")], timestamp_key="time",) -fs.ingest(feature_set, source, run_config=fs.RunConfig())` +fs.ingest(feature_set, source, run_config=fs.RunConfig()) +``` The default value for the `overwrite` parameter in the ingest function for scheduled ingest is `False`, meaning that the target from the previous ingest is not deleted. For the storey engine, the feature is currently implemented for ParquetSource only. (CsvSource will be supported in a future release). For Spark engine, other sources are also supported. diff --git a/docs/runtimes/spark-operator.ipynb b/docs/runtimes/spark-operator.ipynb index bedf0919690..9542efd467d 100644 --- a/docs/runtimes/spark-operator.ipynb +++ b/docs/runtimes/spark-operator.ipynb @@ -54,6 +54,10 @@ "# adds fuse, daemon & iguazio's jars support\n", "sj.with_igz_spark() \n", "\n", + "# Alternately, move volume_mounts to driver and executor-specific fields and leave \n", + "# v3io mounts out of executor mounts if mount_v3io_to_executor=False \n", + "# sj.with_igz_spark(mount_v3io_to_executor=False)\n", + "\n", "# set spark driver volume mount\n", "# sj.function.with_driver_host_path_volume(\"/host/path\", \"/mount/path\")\n", "\n", @@ -127,7 +131,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" },