[3/n] Lightweight Ray AIR API refactor (#38379)

Continuation of #37123
ray-project · Aug 14, 2023 · d9dcc3f · d9dcc3f
1 parent 5435807
commit d9dcc3f
Show file tree

Hide file tree

Showing 52 changed files with 469 additions and 339 deletions.
diff --git a/doc/source/data/batch_inference.rst b/doc/source/data/batch_inference.rst
@@ -423,15 +423,15 @@ Suppose your cluster has 4 nodes, each with 16 CPUs. To limit to at most
 Using models from Ray Train
 ---------------------------
 
-Models that have been trained with :ref:`Ray Train <train-docs>` can then be used for batch inference with :ref:`Ray Data <data>` via the :class:`Checkpoint <ray.air.checkpoint.Checkpoint>` that is returned by :ref:`Ray Train <train-docs>`.
+Models that have been trained with :ref:`Ray Train <train-docs>` can then be used for batch inference with :ref:`Ray Data <data>` via the :class:`Checkpoint <ray.train.Checkpoint>` that is returned by :ref:`Ray Train <train-docs>`.
 
 **Step 1:** Train a model with :ref:`Ray Train <train-docs>`.
 
 .. testcode::
 
     import ray
+    from ray.train import ScalingConfig
     from ray.train.xgboost import XGBoostTrainer
-    from ray.air.config import ScalingConfig
 
     dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
     train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)
@@ -456,13 +456,13 @@ Models that have been trained with :ref:`Ray Train <train-docs>` can then be use
 
     ...
 
-**Step 2:** Extract the :class:`Checkpoint <ray.air.checkpoint.Checkpoint>` from the training :class:`Result <ray.air.Result>`.
+**Step 2:** Extract the :class:`Checkpoint <ray.train.Checkpoint>` from the training :class:`Result <ray.train.Result>`.
 
 .. testcode::
 
     checkpoint = result.checkpoint
 
-**Step 3:** Use Ray Data for batch inference. To load in the model from the :class:`Checkpoint <ray.air.checkpoint.Checkpoint>` inside the Python class, use one of the framework-specific Checkpoint classes.
+**Step 3:** Use Ray Data for batch inference. To load in the model from the :class:`Checkpoint <ray.train.Checkpoint>` inside the Python class, use one of the framework-specific Checkpoint classes.
 
 In this case, we use the :class:`XGBoostCheckpoint <ray.train.xgboost.XGBoostCheckpoint>` to load the model.
 
@@ -475,7 +475,7 @@ The rest of the logic looks the same as in the `Quickstart <#quickstart>`_.
     import numpy as np
     import xgboost
 
-    from ray.air import Checkpoint
+    from ray.train import Checkpoint
     from ray.train.xgboost import XGBoostCheckpoint
 
     test_dataset = valid_dataset.drop_columns(["target"])

diff --git a/doc/source/ray-air/examples/stablediffusion_batch_prediction.ipynb b/doc/source/ray-air/examples/stablediffusion_batch_prediction.ipynb
@@ -224,7 +224,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You may notice that we are not using an AIR {class}`Predictor <ray.train.predictor.Predictor>` here. This is because AIR does not implement an out of the box Predictor for Diffusers. We could implement it ourselves, but Predictors are mainly intended to be used with AIR {class}`Checkpoints <ray.air.checkpoint.Checkpoint>`, and those are not necessary for this example. See {class}`ray.train.predictor.Predictor` for more information and usage examples."
+    "You may notice that we are not using an AIR {class}`Predictor <ray.train.predictor.Predictor>` here. This is because AIR does not implement an out of the box Predictor for Diffusers. We could implement it ourselves, but Predictors are mainly intended to be used with {class}`Checkpoints <ray.train.Checkpoint>`, and those are not necessary for this example. See {class}`ray.train.predictor.Predictor` for more information and usage examples."
    ]
   }
  ],

diff --git a/doc/source/ray-air/examples/torch_detection.ipynb b/doc/source/ray-air/examples/torch_detection.ipynb
@@ -825,10 +825,14 @@
                 }
             ],
             "source": [
-                "from ray.air.config import DatasetConfig, ScalingConfig\n",
+                "from ray.train import ScalingConfig\n",
                 "from ray.train.torch import TorchTrainer\n",
                 "\n",
                 "\n",
+                "# The following transform operation is lazy.\n",
+                "# It will be re-run every epoch.\n",
+                "train_dataset = per_epoch_preprocessor.transform(train_dataset)\n",
+                "\n",
                 "trainer = TorchTrainer(\n",
                 "    train_loop_per_worker=train_loop_per_worker,\n",
                 "    train_loop_config={\n",
@@ -842,13 +846,6 @@
                 "    },\n",
                 "    scaling_config=ScalingConfig(num_workers=4, use_gpu=True),\n",
                 "    datasets={\"train\": train_dataset},\n",
-                "    dataset_config={\n",
-                "        # Don't augment test images. Only apply `per_epoch_preprocessor` to the train\n",
-                "        # set.\n",
-                "        \"train\": DatasetConfig(\n",
-                "            per_epoch_preprocessor=per_epoch_preprocessor\n",
-                "        ),\n",
-                "    },\n",
                 "    preprocessor=preprocessor,\n",
                 ")\n",
                 "results = trainer.fit()"

diff --git a/doc/source/ray-air/examples/xgboost_starter.py b/doc/source/ray-air/examples/xgboost_starter.py
@@ -22,7 +22,7 @@
 # __air_xgb_preprocess_end__
 
 # __air_xgb_train_start__
-from ray.air.config import ScalingConfig
+from ray.train import ScalingConfig
 from ray.train.xgboost import XGBoostTrainer
 
 trainer = XGBoostTrainer(

diff --git a/doc/source/ray-contribute/docs.ipynb b/doc/source/ray-contribute/docs.ipynb
@@ -184,15 +184,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ba88d95f",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# __function_api_start__\n",
-    "from ray.air import session\n",
+    "from ray import train\n",
     "\n",
     "\n",
     "def objective(x, a, b):  # Define an objective function.\n",
@@ -204,7 +200,7 @@
     "    for x in range(20):  # \"Train\" for 20 iterations and compute intermediate scores.\n",
     "        score = objective(x, config[\"a\"], config[\"b\"])\n",
     "\n",
-    "        session.report({\"score\": score})  # Send the score to Tune.\n",
+    "        train.report({\"score\": score})  # Send the score to Tune.\n",
     "\n",
     "\n",
     "# __function_api_end__"
@@ -320,10 +316,7 @@
    "metadata": {
     "tags": [
      "hide-cell"
-    ],
-    "vscode": {
-     "languageId": "python"
-    }
+    ]
    },
    "outputs": [],
    "source": [
@@ -385,11 +378,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "8412103e",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "num_workers = 8\n",

diff --git a/doc/source/ray-core/_examples/datasets_train/datasets_train.py b/doc/source/ray-core/_examples/datasets_train/datasets_train.py
@@ -18,8 +18,7 @@
 import boto3
 import mlflow
 import pandas as pd
-from ray.air.config import ScalingConfig
-from ray.train import DataConfig
+from ray.train import DataConfig, ScalingConfig
 from ray.train.torch.torch_trainer import TorchTrainer
 import torch
 import torch.nn as nn

diff --git a/doc/source/train/distributed-tensorflow-keras.rst b/doc/source/train/distributed-tensorflow-keras.rst
@@ -85,7 +85,7 @@ that you can setup like this:
 
 .. code-block:: python
 
-    from ray.air import ScalingConfig
+    from ray.train import ScalingConfig
     from ray.train.tensorflow import TensorflowTrainer
     # For GPU Training, set `use_gpu` to True.
     use_gpu = False
@@ -99,7 +99,7 @@ To customize the backend setup, you can pass a
 
 .. code-block:: python
 
-    from ray.air import ScalingConfig
+    from ray.train import ScalingConfig
     from ray.train.tensorflow import TensorflowTrainer, TensorflowConfig
 
     trainer = TensorflowTrainer(
@@ -224,7 +224,7 @@ training function. This will cause the checkpoint state from the distributed
 workers to be saved on the ``Trainer`` (where your python script is executed).
 
 The latest saved checkpoint can be accessed through the ``checkpoint`` attribute of
-the :py:class:`~ray.air.result.Result`, and the best saved checkpoints can be accessed by the ``best_checkpoints``
+the :py:class:`~ray.train.Result`, and the best saved checkpoints can be accessed by the ``best_checkpoints``
 attribute.
 
 Concrete examples are provided to demonstrate how checkpoints (model weights but not models) are saved
@@ -235,7 +235,7 @@ appropriately in distributed training.
     :emphasize-lines: 23
 
     from ray import train
-    from ray.air import Checkpoint, ScalingConfig
+    from ray.train import Checkpoint, ScalingConfig
     from ray.train.tensorflow import TensorflowTrainer
 
     import numpy as np
@@ -285,7 +285,7 @@ Loading checkpoints
     :emphasize-lines: 15, 21, 22, 25, 26, 27, 30
 
     from ray import train
-    from ray.air import Checkpoint, ScalingConfig
+    from ray.train import Checkpoint, ScalingConfig
     from ray.train.tensorflow import TensorflowTrainer
 
     import numpy as np

diff --git a/doc/source/train/distributed-xgboost-lightgbm.rst b/doc/source/train/distributed-xgboost-lightgbm.rst
@@ -65,7 +65,7 @@ both implement checkpointing out of the box and will create
 :class:`~ray.train.xgboost.XGBoostCheckpoint`\s and :class:`~ray.train.lightgbm.LightGBMCheckpoint`\s
 respectively.
 
-The only required change is to configure :class:`~ray.air.CheckpointConfig` to set
+The only required change is to configure :class:`~ray.train.CheckpointConfig` to set
 the checkpointing frequency. For example, the following configuration will
 save a checkpoint on every boosting round and will only keep the latest checkpoint:
 
@@ -86,7 +86,7 @@ How to scale out training?
 --------------------------
 
 The benefit of using Ray Train is that you can seamlessly scale up your training by
-adjusting the :class:`ScalingConfig <ray.air.config.ScalingConfig>`.
+adjusting the :class:`ScalingConfig <ray.train.ScalingConfig>`.
 
 .. note::
     Ray Train does not modify or otherwise alter the working

diff --git a/doc/source/train/doc_code/gbdt_user_guide.py b/doc/source/train/doc_code/gbdt_user_guide.py
@@ -3,8 +3,8 @@
 
 # __xgboost_start__
 import ray
+from ray.train import ScalingConfig
 from ray.train.xgboost import XGBoostTrainer
-from ray.air.config import ScalingConfig
 
 # Load data.
 dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
@@ -44,7 +44,7 @@
 # __xgb_detail_intro_end__
 
 # __xgb_detail_scaling_start__
-from ray.air.config import ScalingConfig
+from ray.train import ScalingConfig
 
 scaling_config = ScalingConfig(
     # Number of workers to use for data parallelism.
@@ -79,8 +79,8 @@
 
 # __lightgbm_start__
 import ray
+from ray.train import ScalingConfig
 from ray.train.lightgbm import LightGBMTrainer
-from ray.air.config import ScalingConfig
 
 # Load data.
 dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
@@ -120,7 +120,7 @@
 # __lgbm_detail_intro_end__
 
 # __lgbm_detail_scaling_start__
-from ray.air.config import ScalingConfig
+from ray.train import ScalingConfig
 
 scaling_config = ScalingConfig(
     # Number of workers to use for data parallelism.

diff --git a/doc/source/train/examples/lightning/lightning_cola_advanced.ipynb b/doc/source/train/examples/lightning/lightning_cola_advanced.ipynb
@@ -227,8 +227,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from ray.train import RunConfig, ScalingConfig, CheckpointConfig\n",
     "from ray.train.lightning import LightningTrainer, LightningConfigBuilder\n",
-    "from ray.air.config import RunConfig, ScalingConfig, CheckpointConfig\n",
     "\n",
     "# Define the configs for LightningTrainer\n",
     "lightning_config = (\n",
@@ -252,7 +252,7 @@
     ":::{note}\n",
     "{meth}`LightningConfigBuilder.checkpointing() <ray.train.lightning.LightningConfigBuilder.checkpointing>` creates a [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint) callback. This callback defines the checkpoint frequency and saves checkpoint files in Lightning style. \n",
     "\n",
-    "If you want to save AIR checkpoints for Batch Prediction, please also provide an AIR {class}`CheckpointConfig <ray.air.config.CheckpointConfig>`.\n",
+    "If you want to save AIR checkpoints for Batch Prediction, please also provide a {class}`CheckpointConfig <ray.train.CheckpointConfig>`.\n",
     ":::"
    ]
   },

diff --git a/doc/source/train/examples/lightning/lightning_exp_tracking.ipynb b/doc/source/train/examples/lightning/lightning_exp_tracking.ipynb
@@ -215,7 +215,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ray.air.config import RunConfig, ScalingConfig\n",
+    "from ray.train import RunConfig, ScalingConfig\n",
     "from ray.train.lightning import LightningConfigBuilder, LightningTrainer\n",
     "\n",
     "builder = LightningConfigBuilder()\n",