NewTask -> new_task + added docstring (#442)

mlrun · Sep 22, 2020 · dce2ee3 · dce2ee3
1 parent 61b0c2c
commit dce2ee3
Show file tree

Hide file tree

Showing 24 changed files with 122 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -167,7 +167,7 @@ You run the task on a "job" function, and print the result output (in this case,
 For more information and examples, see the [**examples/mlrun_basics.ipynb**](examples/mlrun_basics.ipynb) notebook.
 ```python
 # Create a task and set its attributes
-task = NewTask(handler=handler, name='demo', params={'p1': 5})
+task = new_task(handler=handler, name='demo', params={'p1': 5})
 task.with_secrets('file', 'secrets.txt').set_label('type', 'demo')
 
 run = new_function(command='myfile.py', kind='job').run(task)
@@ -361,7 +361,7 @@ For example, the following code demonstrates how to use hyperparameters to run t
          "gamma":     [0.0, 0.1, 0.2, 0.3],
          }
 
-    task = NewTask(handler=xgb_train, out_path='/User/mlrun/data').with_hyper_params(parameters, 'max.accuracy')
+    task = new_task(handler=xgb_train, out_path='/User/mlrun/data').with_hyper_params(parameters, 'max.accuracy')
     run = run_local(task)
 ```
 
@@ -377,7 +377,7 @@ mlrun run --name train_hyper -x p1="[3,7,5]" -x p2="[5,2,9]" --out-path '/User/m
 You can also use a parameters file if you want to control the parameter combinations or if the parameters are more complex.
 The following code from the example [**mlrun_basics.ipynb**](examples/mlrun_basics.ipynb) notebook demonstrates how to run a task that uses a CSV parameters file (**params.csv** in the current directory):
 ```python
-    task = NewTask(handler=xgb_train).with_param_file('params.csv', 'max.accuracy')
+    task = new_task(handler=xgb_train).with_param_file('params.csv', 'max.accuracy')
     run = run_local(task)
 ```
 

diff --git a/docs/end-to-end-pipeline.rst b/docs/end-to-end-pipeline.rst
@@ -85,7 +85,7 @@ git tracking on that
 .. code:: ipython3
 
     from os import path
-    from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io
+    from mlrun import run_local, mlconf, import_function, mount_v3io
     mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'
     
     # specify artifacts target location

diff --git a/docs/job-submission-and-tracking.md b/docs/job-submission-and-tracking.md
@@ -72,7 +72,7 @@ You run the task on a "job" function, and print the result output (in this case,
 For more information and examples, see the [**Examples section**](examples.html).
 ```python
 # Create a task and set its attributes
-task = NewTask(handler=handler, name='demo', params={'p1': 5})
+task = new_task(handler=handler, name='demo', params={'p1': 5})
 task.with_secrets('file', 'secrets.txt').set_label('type', 'demo')
 
 run = new_function(command='myfile.py', kind='job').run(task)
@@ -280,7 +280,7 @@ For example, the following code demonstrates how to use hyperparameters to run t
          "gamma":     [0.0, 0.1, 0.2, 0.3],
          }
 
-    task = NewTask(handler=xgb_train, out_path='/User/mlrun/data').with_hyper_params(parameters, 'max.accuracy')
+    task = new_task(handler=xgb_train, out_path='/User/mlrun/data').with_hyper_params(parameters, 'max.accuracy')
     run = run_local(task)
 ```
 
@@ -296,7 +296,7 @@ mlrun run --name train_hyper -x p1="[3,7,5]" -x p2="[5,2,9]" --out-path '/User/m
 You can also use a parameters file if you want to control the parameter combinations or if the parameters are more complex.
 The following code from the [**Examples section**](examples.html) demonstrates how to run a task that uses a CSV parameters file (**params.csv** in the current directory):
 ```python
-    task = NewTask(handler=xgb_train).with_param_file('params.csv', 'max.accuracy')
+    task = new_task(handler=xgb_train).with_param_file('params.csv', 'max.accuracy')
     run = run_local(task)
 ```
 

diff --git a/docs/load-from-marketplace.md b/docs/load-from-marketplace.md
@@ -45,7 +45,7 @@ print(f'Project path: {project_path}\nProject name: {project_name}')
 The artifact path is the default path for saving all the artifacts that the functions generate:
 
 ```python
-from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io
+from mlrun import run_local, mlconf, import_function, mount_v3io
 
 # Target location for storing pipeline artifacts
 artifact_path = path.abspath('jobs')

diff --git a/examples/load-project.ipynb b/examples/load-project.ipynb
@@ -518,8 +518,8 @@
     }
    ],
    "source": [
-    "from mlrun import run_local, NewTask\n",
-    "run_local(NewTask(handler='iris_generator'), proj.func('xgb'), workdir='./')"
+    "from mlrun import run_local, new_task\n",
+    "run_local(new_task(handler='iris_generator'), proj.func('xgb'), workdir='./')"
    ]
   },
   {

diff --git a/examples/mlrun_basics.ipynb b/examples/mlrun_basics.ipynb
@@ -97,7 +97,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from mlrun import run_local, RunTemplate, NewTask, mlconf\n",
+    "from mlrun import run_local, RunTemplate, new_task, mlconf\n",
     "from os import path\n",
     "mlconf.dbpath = mlconf.dbpath or './'"
    ]
@@ -177,7 +177,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "task = NewTask(name='demo', params={'p1': 5}, artifact_path=artifact_path).with_secrets('file', 'secrets.txt').set_label('type', 'demo')"
+    "task = new_task(name='demo', params={'p1': 5}, artifact_path=artifact_path).with_secrets('file', 'secrets.txt').set_label('type', 'demo')"
    ]
   },
   {
@@ -1723,7 +1723,7 @@
     }
    ],
    "source": [
-    "task = NewTask(name='demo2', handler=handler, artifact_path=artifact_path).with_params(p1=7)\n",
+    "task = new_task(name='demo2', handler=handler, artifact_path=artifact_path).with_params(p1=7)\n",
     "run = run_local(task)"
    ]
   },
@@ -2025,7 +2025,7 @@
     }
    ],
    "source": [
-    "task = NewTask(name='demo2', handler=handler, artifact_path=artifact_path).with_param_file('params.csv', 'max.accuracy')\n",
+    "task = new_task(name='demo2', handler=handler, artifact_path=artifact_path).with_param_file('params.csv', 'max.accuracy')\n",
     "run = run_local(task)"
    ]
   },
@@ -2058,4 +2058,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/examples/mlrun_dask.ipynb b/examples/mlrun_dask.ipynb
@@ -85,7 +85,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from mlrun import new_function, mlconf, code_to_function, mount_v3io, NewTask\n",
+    "from mlrun import new_function, mlconf, code_to_function, mount_v3io, new_task\n",
     "#mlconf.dbpath = 'http://mlrun-api:8080'"
    ]
   },
@@ -562,7 +562,7 @@
     "@dsl.pipeline(name=\"dask_pipeline\")\n",
     "def dask_pipe(x=1,y=10):\n",
     "    # use_db option will use a function (DB) pointer instead of adding the function spec to the YAML\n",
-    "    myrun = dsf.as_step(NewTask(handler=hndlr, name=\"dask_pipeline\", params={'x': x, 'y': y}), use_db=True)\n",
+    "    myrun = dsf.as_step(new_task(handler=hndlr, name=\"dask_pipeline\", params={'x': x, 'y': y}), use_db=True)\n",
     "    \n",
     "    # if the step (dask client) need v3io access u should add: .apply(mount_v3io())\n",
     "    \n",

diff --git a/examples/mlrun_export_import.ipynb b/examples/mlrun_export_import.ipynb
@@ -234,7 +234,7 @@
     "\n",
     "# create and run the task\n",
     "images_path = path.abspath('images')\n",
-    "open_archive_task = mlrun.NewTask('download',  \n",
+    "open_archive_task = mlrun.new_task('download',\n",
     "    params={'target_dir': images_path},\n",
     "    inputs={'archive_url': 'http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip'})"
    ]

diff --git a/examples/mlrun_jobs.ipynb b/examples/mlrun_jobs.ipynb
@@ -261,7 +261,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from mlrun import run_local, code_to_function, mlconf, NewTask\n",
+    "from mlrun import run_local, code_to_function, mlconf, new_task\n",
     "from mlrun.platforms.other import auto_mount\n",
     "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'"
    ]
@@ -539,7 +539,7 @@
     }
    ],
    "source": [
-    "train_run = run_local(NewTask(handler=training, params={'p1': 5}, artifact_path=out))"
+    "train_run = run_local(new_task(handler=training, params={'p1': 5}, artifact_path=out))"
    ]
   },
   {
@@ -813,7 +813,7 @@
    "source": [
     "model = train_run.outputs['mymodel']\n",
     "\n",
-    "validation_run = run_local(NewTask(handler=validation, inputs={'model': model}, artifact_path=out))"
+    "validation_run = run_local(new_task(handler=validation, inputs={'model': model}, artifact_path=out))"
    ]
   },
   {
@@ -1057,7 +1057,7 @@
    "outputs": [],
    "source": [
     "# create the base task (common to both steps), and set the output path and experiment label\n",
-    "base_task = NewTask(artifact_path=out).set_label('stage', 'dev')"
+    "base_task = new_task(artifact_path=out).set_label('stage', 'dev')"
    ]
   },
   {
@@ -1296,7 +1296,7 @@
    ],
    "source": [
     "# run our training task, with hyper params, and select the one with max accuracy\n",
-    "train_task = NewTask(name='my-training', handler='training', params={'p1': 9}, base=base_task)\n",
+    "train_task = new_task(name='my-training', handler='training', params={'p1': 9}, base=base_task)\n",
     "train_run = trainer.run(train_task)"
    ]
   },

diff --git a/examples/mlrun_sparkk8s.ipynb b/examples/mlrun_sparkk8s.ipynb
@@ -29,7 +29,7 @@
    "source": [
     "import os\n",
     "from os.path import isfile, join\n",
-    "from mlrun import new_function, NewTask, mlconf\n",
+    "from mlrun import new_function, new_task, mlconf\n",
     "\n",
     "#Set the mlrun database/api\n",
     "mlconf.dbpath = 'http://mlrun-api:8080'\n",
@@ -83,7 +83,7 @@
     "         'query': QUERY,\n",
     "         'write_options': WRITE_OPTIONS}\n",
     "\n",
-    "SPARK_TASK = NewTask(params=PARAMS)"
+    "SPARK_TASK = new_task(params=PARAMS)"
    ]
   },
   {
@@ -192,4 +192,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/mlrun/__init__.py b/mlrun/__init__.py
@@ -29,7 +29,7 @@
 from .datastore import DataItem
 from .db import get_run_db
 from .execution import MLClientCtx
-from .model import RunTemplate, NewTask, RunObject
+from .model import RunTemplate, NewTask, new_task, RunObject
 from .platforms import mount_v3io, v3io_cred
 from .projects import load_project, new_project
 from .run import (

diff --git a/mlrun/model.py b/mlrun/model.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import inspect
+import warnings
 from copy import deepcopy
 from os import environ
 
+from .config import config
 from .db import get_run_db
 from .utils import dict_to_yaml, get_in, dict_to_json, get_artifact_target
-from .config import config
 
 
 class ModelObj:
@@ -440,6 +441,7 @@ def logs(self, watch=True, db=None):
         return state
 
 
+# TODO: remove in 0.7.0
 def NewTask(
     name=None,
     project=None,
@@ -457,7 +459,72 @@ def NewTask(
     secrets=None,
     base=None,
 ):
-    """Create new task"""
+    """Creates a new task - see new_task
+    """
+    warnings.warn(
+        "NewTask is deprecated and will be removed in 0.7.0, use new_task instead",
+        FutureWarning,
+    )
+    return new_task(
+        name,
+        project,
+        handler,
+        params,
+        hyper_params,
+        param_file,
+        selector,
+        tuning_strategy,
+        inputs,
+        outputs,
+        in_path,
+        out_path,
+        artifact_path,
+        secrets,
+        base,
+    )
+
+
+def new_task(
+    name=None,
+    project=None,
+    handler=None,
+    params=None,
+    hyper_params=None,
+    param_file=None,
+    selector=None,
+    tuning_strategy=None,
+    inputs=None,
+    outputs=None,
+    in_path=None,
+    out_path=None,
+    artifact_path=None,
+    secrets=None,
+    base=None,
+):
+    """Creates a new task
+
+    :param name:            task name
+    :param project:         task project
+    :param handler          code entry-point/hanfler name
+    :param params:          input parameters (dict)
+    :param hyper_params:    dictionary of hyper parameters and list values, each
+                            hyper param holds a list of values, the run will be
+                            executed for every parameter combination (GridSearch)
+    :param param_file:      a csv file with parameter combinations, first row hold
+                            the parameter names, following rows hold param values
+    :param selector:        selection criteria for hyper params e.g. "max.accuracy"
+    :param tuning_strategy: selection strategy for hyper params e.g. list, grid, random
+    :param inputs:          dictionary of input objects + optional paths (if path is
+                            omitted the path will be the in_path/key.
+    :param outputs:         dictionary of input objects + optional paths (if path is
+                            omitted the path will be the out_path/key.
+    :param in_path:         default input path/url (prefix) for inputs
+    :param out_path:        default output path/url (prefix) for artifacts
+    :param artifact_path:   default artifact output path
+    :param secrets:         extra secrets specs, will be injected into the runtime
+                            e.g. ['file=<filename>', 'env=ENV_KEY1,ENV_KEY2']
+    :param base:            task instance to use as a base instead of a fresh new task instance
+    """
 
     if base:
         run = deepcopy(base)

diff --git a/mlrun/run.py b/mlrun/run.py
@@ -108,7 +108,7 @@ def run_local(
 
     e.g.:
            # define a task
-           task = NewTask(params={'p1': 8}, out_path=out_path)
+           task = new_task(params={'p1': 8}, out_path=out_path)
            # run
            run = run_local(spec, command='src/training.py', workdir='src')
 
@@ -176,7 +176,7 @@ def function_to_module(code="", workdir=None, secrets=None):
     example:
 
         mod = mlrun.function_to_module('./examples/training.py')
-        task = mlrun.NewTask(inputs={'infile.txt': '../examples/infile.txt'})
+        task = mlrun.new_task(inputs={'infile.txt': '../examples/infile.txt'})
         context = mlrun.get_or_create_ctx('myfunc', spec=task)
         mod.my_job(context, p1=1, p2='x')
         print(context.to_yaml())

diff --git a/tests/system/examples/basics/test_basics.py b/tests/system/examples/basics/test_basics.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 
-from mlrun import run_local, NewTask
+from mlrun import run_local, new_task
 from mlrun.artifacts import PlotArtifact
 
 from tests.system.base import TestMLRunSystem
@@ -18,13 +18,13 @@ def custom_setup(self):
         # {{run.uid}} will be substituted with the run id, so output will be written to different directories per run
         output_path = str(self.results_path / "{{run.uid}}")
         self._basics_task = (
-            NewTask(name="demo", params={"p1": 5}, artifact_path=output_path)
+            new_task(name="demo", params={"p1": 5}, artifact_path=output_path)
             .with_secrets("file", self.assets_path / "secrets.txt")
             .set_label("type", "demo")
         )
 
         self._logger.debug("Creating inline task")
-        self._inline_task = NewTask(
+        self._inline_task = new_task(
             name="demo2",
             handler=self._get_inline_handler(),
             artifact_path=str(self.results_path / "{{run.uid}}"),

diff --git a/tests/system/examples/basics/test_db.py b/tests/system/examples/basics/test_db.py
@@ -1,4 +1,4 @@
-from mlrun import get_run_db, run_local, NewTask
+from mlrun import get_run_db, run_local, new_task
 
 from tests.system.base import TestMLRunSystem
 
@@ -16,7 +16,7 @@ def custom_setup(self):
         # {{run.uid}} will be substituted with the run id, so output will be written to different directories per run
         output_path = str(self.results_path / "{{run.uid}}")
         task = (
-            NewTask(name="demo", params={"p1": 5}, artifact_path=output_path)
+            new_task(name="demo", params={"p1": 5}, artifact_path=output_path)
             .with_secrets("file", self.assets_path / "secrets.txt")
             .set_label("type", "demo")
         )