Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial pass at wandb Ludwig integration #514

Merged
merged 20 commits into from Feb 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions ludwig/contribs/__init__.py
Expand Up @@ -35,13 +35,15 @@
method with `pass`, or just don't implement the method.
"""

## Contributors, import your class here:
# Contributors, import your class here:
from .comet import Comet
from .wandb import Wandb

contrib_registry = {
## Contributors, add your class here:
# Contributors, add your class here:
'classes': {
'comet': Comet,
'wandb': Wandb,
},
'instances': [],
}
63 changes: 63 additions & 0 deletions ludwig/contribs/wandb.py
@@ -0,0 +1,63 @@
# coding=utf-8
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import logging
import os


logger = logging.getLogger(__name__)


class Wandb():
"""Class that defines the methods necessary to hook into process."""

@staticmethod
def import_call(argv, *args, **kwargs):
"""
Enable Third-party support from wandb.ai
Allows experiment tracking, visualization, and
management.
"""
try:
import wandb
# Needed to call an attribute of wandb to make DeepSource not complain
return Wandb() if wandb.__version__ else None
except ImportError:
logger.error(
"Ignored --wandb: Please install wandb; see https://docs.wandb.com")
return None

def train_model(self, model, *args, **kwargs):
import wandb
logger.info("wandb.train_model() called...")
config = model.hyperparameters.copy()
del config["input_features"]
del config["output_features"]
wandb.config.update(config)

def train_init(self, experiment_directory, experiment_name, model_name,
resume, output_directory):
import wandb
logger.info("wandb.train_init() called...")
wandb.init(project=os.getenv("WANDB_PROJECT", experiment_name),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add name=model_name to the init parameters.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, I was not aware of what model_name was used for initially but now it makes sense to use it as the W&B run name

name=model_name, sync_tensorboard=True, dir=output_directory)
wandb.save(os.path.join(experiment_directory, "*"))

def visualize_figure(self, fig):
import wandb
logger.info("wandb.visualize_figure() called...")
if wandb.run:
wandb.log({"figure": fig})
5 changes: 4 additions & 1 deletion ludwig/predict.py
Expand Up @@ -216,7 +216,10 @@ def save_prediction_outputs(
for output_field, outputs in postprocessed_output.items():
for output_type, values in outputs.items():
if output_type not in skip_output_types:
save_csv(csv_filename.format(output_field, output_type), values)
save_csv(
csv_filename.format(output_field, output_type),
values
)


def save_test_statistics(test_stats, experiment_dir_name):
Expand Down
5 changes: 5 additions & 0 deletions ludwig/train.py
Expand Up @@ -329,6 +329,11 @@ def full_train(
train_set_metadata
)

contrib_command("train_init", experiment_directory=experiment_dir_name,
experiment_name=experiment_name, model_name=model_name,
output_directory=output_directory,
resume=model_resume_path is not None)

# run the experiment
model, result = train(
training_set=training_set,
Expand Down
2 changes: 2 additions & 0 deletions mkdocs/docs/user_guide.md
Expand Up @@ -3016,6 +3016,8 @@ Ludwig supports the following integrations:

- `--comet` - logs training metrics, environment details, test results, visualizations, and more to [Comet.ML](https://comet.ml). Requires a freely available account. For more details, see Comet's [Running Ludwig with Comet](https://www.comet.ml/docs/python-sdk/ludwig/#running-ludwig-with-comet).

- `--wandb` - logs training metrics, configuration parameters, environment details, and trained model to [Weights & Biases](https://www.wandb.com/). For more details, refer to [W&B Quickstart](https://docs.wandb.com/quickstart).

For more information about integration contributions, please see the [Developer Guide](developer_guide.md).

Programmatic API
Expand Down
3 changes: 2 additions & 1 deletion requirements_test.txt
@@ -1 +1,2 @@
pytest
pytest
wandb
58 changes: 58 additions & 0 deletions tests/integration_tests/test_contrib_wandb.py
@@ -0,0 +1,58 @@
import logging
import os
import shutil
import sys

import ludwig.contrib
from tests.integration_tests.test_experiment import run_experiment
from tests.integration_tests.utils import image_feature
from tests.integration_tests.utils import category_feature
from tests.integration_tests.utils import generate_data

import wandb

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logging.getLogger("ludwig").setLevel(logging.INFO)


def test_wandb_experiment(csv_filename):
# Test W&B integration

# add wandb arg and detect flag
sys.argv.append('--wandb')
ludwig.contrib.contrib_import()

# disable sync to cloud
os.environ['WANDB_MODE'] = 'dryrun'

# Image Inputs
image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

# Inputs & Outputs
input_features = [image_feature(folder=image_dest_folder)]
output_features = [category_feature()]
rel_path = generate_data(input_features, output_features, csv_filename)

# Run experiment
run_experiment(input_features, output_features, data_csv=rel_path)

# Check a W&B run was created
assert wandb.run is not None

# End session
wandb.join()

# Remove instance from contrib_registry
ludwig.contrib.contrib_registry['instances'].pop()

# Delete the temporary data created
shutil.rmtree(image_dest_folder)


if __name__ == '__main__':
"""
To run tests individually, run:
```python -m pytest tests/integration_tests/test_contrib_wandb.py::test_name```
"""
pass