Skip to content

Commit

Permalink
[3/n] Lightweight Ray AIR API refactor (#38379)
Browse files Browse the repository at this point in the history
Continuation of #37123
  • Loading branch information
pcmoritz committed Aug 14, 2023
1 parent 5435807 commit d9dcc3f
Show file tree
Hide file tree
Showing 52 changed files with 469 additions and 339 deletions.
10 changes: 5 additions & 5 deletions doc/source/data/batch_inference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -423,15 +423,15 @@ Suppose your cluster has 4 nodes, each with 16 CPUs. To limit to at most
Using models from Ray Train
---------------------------

Models that have been trained with :ref:`Ray Train <train-docs>` can then be used for batch inference with :ref:`Ray Data <data>` via the :class:`Checkpoint <ray.air.checkpoint.Checkpoint>` that is returned by :ref:`Ray Train <train-docs>`.
Models that have been trained with :ref:`Ray Train <train-docs>` can then be used for batch inference with :ref:`Ray Data <data>` via the :class:`Checkpoint <ray.train.Checkpoint>` that is returned by :ref:`Ray Train <train-docs>`.

**Step 1:** Train a model with :ref:`Ray Train <train-docs>`.

.. testcode::

import ray
from ray.train import ScalingConfig
from ray.train.xgboost import XGBoostTrainer
from ray.air.config import ScalingConfig

dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)
Expand All @@ -456,13 +456,13 @@ Models that have been trained with :ref:`Ray Train <train-docs>` can then be use

...

**Step 2:** Extract the :class:`Checkpoint <ray.air.checkpoint.Checkpoint>` from the training :class:`Result <ray.air.Result>`.
**Step 2:** Extract the :class:`Checkpoint <ray.train.Checkpoint>` from the training :class:`Result <ray.train.Result>`.

.. testcode::

checkpoint = result.checkpoint

**Step 3:** Use Ray Data for batch inference. To load in the model from the :class:`Checkpoint <ray.air.checkpoint.Checkpoint>` inside the Python class, use one of the framework-specific Checkpoint classes.
**Step 3:** Use Ray Data for batch inference. To load in the model from the :class:`Checkpoint <ray.train.Checkpoint>` inside the Python class, use one of the framework-specific Checkpoint classes.

In this case, we use the :class:`XGBoostCheckpoint <ray.train.xgboost.XGBoostCheckpoint>` to load the model.

Expand All @@ -475,7 +475,7 @@ The rest of the logic looks the same as in the `Quickstart <#quickstart>`_.
import numpy as np
import xgboost

from ray.air import Checkpoint
from ray.train import Checkpoint
from ray.train.xgboost import XGBoostCheckpoint

test_dataset = valid_dataset.drop_columns(["target"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"You may notice that we are not using an AIR {class}`Predictor <ray.train.predictor.Predictor>` here. This is because AIR does not implement an out of the box Predictor for Diffusers. We could implement it ourselves, but Predictors are mainly intended to be used with AIR {class}`Checkpoints <ray.air.checkpoint.Checkpoint>`, and those are not necessary for this example. See {class}`ray.train.predictor.Predictor` for more information and usage examples."
"You may notice that we are not using an AIR {class}`Predictor <ray.train.predictor.Predictor>` here. This is because AIR does not implement an out of the box Predictor for Diffusers. We could implement it ourselves, but Predictors are mainly intended to be used with {class}`Checkpoints <ray.train.Checkpoint>`, and those are not necessary for this example. See {class}`ray.train.predictor.Predictor` for more information and usage examples."
]
}
],
Expand Down
13 changes: 5 additions & 8 deletions doc/source/ray-air/examples/torch_detection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -825,10 +825,14 @@
}
],
"source": [
"from ray.air.config import DatasetConfig, ScalingConfig\n",
"from ray.train import ScalingConfig\n",
"from ray.train.torch import TorchTrainer\n",
"\n",
"\n",
"# The following transform operation is lazy.\n",
"# It will be re-run every epoch.\n",
"train_dataset = per_epoch_preprocessor.transform(train_dataset)\n",
"\n",
"trainer = TorchTrainer(\n",
" train_loop_per_worker=train_loop_per_worker,\n",
" train_loop_config={\n",
Expand All @@ -842,13 +846,6 @@
" },\n",
" scaling_config=ScalingConfig(num_workers=4, use_gpu=True),\n",
" datasets={\"train\": train_dataset},\n",
" dataset_config={\n",
" # Don't augment test images. Only apply `per_epoch_preprocessor` to the train\n",
" # set.\n",
" \"train\": DatasetConfig(\n",
" per_epoch_preprocessor=per_epoch_preprocessor\n",
" ),\n",
" },\n",
" preprocessor=preprocessor,\n",
")\n",
"results = trainer.fit()"
Expand Down
2 changes: 1 addition & 1 deletion doc/source/ray-air/examples/xgboost_starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# __air_xgb_preprocess_end__

# __air_xgb_train_start__
from ray.air.config import ScalingConfig
from ray.train import ScalingConfig
from ray.train.xgboost import XGBoostTrainer

trainer = XGBoostTrainer(
Expand Down
21 changes: 5 additions & 16 deletions doc/source/ray-contribute/docs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,11 @@
"cell_type": "code",
"execution_count": null,
"id": "ba88d95f",
"metadata": {
"vscode": {
"languageId": "python"
}
},
"metadata": {},
"outputs": [],
"source": [
"# __function_api_start__\n",
"from ray.air import session\n",
"from ray import train\n",
"\n",
"\n",
"def objective(x, a, b): # Define an objective function.\n",
Expand All @@ -204,7 +200,7 @@
" for x in range(20): # \"Train\" for 20 iterations and compute intermediate scores.\n",
" score = objective(x, config[\"a\"], config[\"b\"])\n",
"\n",
" session.report({\"score\": score}) # Send the score to Tune.\n",
" train.report({\"score\": score}) # Send the score to Tune.\n",
"\n",
"\n",
"# __function_api_end__"
Expand Down Expand Up @@ -320,10 +316,7 @@
"metadata": {
"tags": [
"hide-cell"
],
"vscode": {
"languageId": "python"
}
]
},
"outputs": [],
"source": [
Expand Down Expand Up @@ -385,11 +378,7 @@
"cell_type": "code",
"execution_count": null,
"id": "8412103e",
"metadata": {
"vscode": {
"languageId": "python"
}
},
"metadata": {},
"outputs": [],
"source": [
"num_workers = 8\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import boto3
import mlflow
import pandas as pd
from ray.air.config import ScalingConfig
from ray.train import DataConfig
from ray.train import DataConfig, ScalingConfig
from ray.train.torch.torch_trainer import TorchTrainer
import torch
import torch.nn as nn
Expand Down
10 changes: 5 additions & 5 deletions doc/source/train/distributed-tensorflow-keras.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ that you can setup like this:

.. code-block:: python
from ray.air import ScalingConfig
from ray.train import ScalingConfig
from ray.train.tensorflow import TensorflowTrainer
# For GPU Training, set `use_gpu` to True.
use_gpu = False
Expand All @@ -99,7 +99,7 @@ To customize the backend setup, you can pass a

.. code-block:: python
from ray.air import ScalingConfig
from ray.train import ScalingConfig
from ray.train.tensorflow import TensorflowTrainer, TensorflowConfig
trainer = TensorflowTrainer(
Expand Down Expand Up @@ -224,7 +224,7 @@ training function. This will cause the checkpoint state from the distributed
workers to be saved on the ``Trainer`` (where your python script is executed).

The latest saved checkpoint can be accessed through the ``checkpoint`` attribute of
the :py:class:`~ray.air.result.Result`, and the best saved checkpoints can be accessed by the ``best_checkpoints``
the :py:class:`~ray.train.Result`, and the best saved checkpoints can be accessed by the ``best_checkpoints``
attribute.

Concrete examples are provided to demonstrate how checkpoints (model weights but not models) are saved
Expand All @@ -235,7 +235,7 @@ appropriately in distributed training.
:emphasize-lines: 23
from ray import train
from ray.air import Checkpoint, ScalingConfig
from ray.train import Checkpoint, ScalingConfig
from ray.train.tensorflow import TensorflowTrainer
import numpy as np
Expand Down Expand Up @@ -285,7 +285,7 @@ Loading checkpoints
:emphasize-lines: 15, 21, 22, 25, 26, 27, 30
from ray import train
from ray.air import Checkpoint, ScalingConfig
from ray.train import Checkpoint, ScalingConfig
from ray.train.tensorflow import TensorflowTrainer
import numpy as np
Expand Down
4 changes: 2 additions & 2 deletions doc/source/train/distributed-xgboost-lightgbm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ both implement checkpointing out of the box and will create
:class:`~ray.train.xgboost.XGBoostCheckpoint`\s and :class:`~ray.train.lightgbm.LightGBMCheckpoint`\s
respectively.

The only required change is to configure :class:`~ray.air.CheckpointConfig` to set
The only required change is to configure :class:`~ray.train.CheckpointConfig` to set
the checkpointing frequency. For example, the following configuration will
save a checkpoint on every boosting round and will only keep the latest checkpoint:

Expand All @@ -86,7 +86,7 @@ How to scale out training?
--------------------------

The benefit of using Ray Train is that you can seamlessly scale up your training by
adjusting the :class:`ScalingConfig <ray.air.config.ScalingConfig>`.
adjusting the :class:`ScalingConfig <ray.train.ScalingConfig>`.

.. note::
Ray Train does not modify or otherwise alter the working
Expand Down
8 changes: 4 additions & 4 deletions doc/source/train/doc_code/gbdt_user_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

# __xgboost_start__
import ray
from ray.train import ScalingConfig
from ray.train.xgboost import XGBoostTrainer
from ray.air.config import ScalingConfig

# Load data.
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
Expand Down Expand Up @@ -44,7 +44,7 @@
# __xgb_detail_intro_end__

# __xgb_detail_scaling_start__
from ray.air.config import ScalingConfig
from ray.train import ScalingConfig

scaling_config = ScalingConfig(
# Number of workers to use for data parallelism.
Expand Down Expand Up @@ -79,8 +79,8 @@

# __lightgbm_start__
import ray
from ray.train import ScalingConfig
from ray.train.lightgbm import LightGBMTrainer
from ray.air.config import ScalingConfig

# Load data.
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
Expand Down Expand Up @@ -120,7 +120,7 @@
# __lgbm_detail_intro_end__

# __lgbm_detail_scaling_start__
from ray.air.config import ScalingConfig
from ray.train import ScalingConfig

scaling_config = ScalingConfig(
# Number of workers to use for data parallelism.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@
"metadata": {},
"outputs": [],
"source": [
"from ray.train import RunConfig, ScalingConfig, CheckpointConfig\n",
"from ray.train.lightning import LightningTrainer, LightningConfigBuilder\n",
"from ray.air.config import RunConfig, ScalingConfig, CheckpointConfig\n",
"\n",
"# Define the configs for LightningTrainer\n",
"lightning_config = (\n",
Expand All @@ -252,7 +252,7 @@
":::{note}\n",
"{meth}`LightningConfigBuilder.checkpointing() <ray.train.lightning.LightningConfigBuilder.checkpointing>` creates a [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint) callback. This callback defines the checkpoint frequency and saves checkpoint files in Lightning style. \n",
"\n",
"If you want to save AIR checkpoints for Batch Prediction, please also provide an AIR {class}`CheckpointConfig <ray.air.config.CheckpointConfig>`.\n",
"If you want to save AIR checkpoints for Batch Prediction, please also provide a {class}`CheckpointConfig <ray.train.CheckpointConfig>`.\n",
":::"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@
"metadata": {},
"outputs": [],
"source": [
"from ray.air.config import RunConfig, ScalingConfig\n",
"from ray.train import RunConfig, ScalingConfig\n",
"from ray.train.lightning import LightningConfigBuilder, LightningTrainer\n",
"\n",
"builder = LightningConfigBuilder()\n",
Expand Down
Loading

0 comments on commit d9dcc3f

Please sign in to comment.