Skip to content

Update the wandb logger #590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ openai.Model.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123")

You can learn more in our [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning).

To log the training results from fine-tuning to Weights & Biases use:

```
openai wandb sync
```

For more information, read the [wandb documentation](https://docs.wandb.ai/guides/integrations/openai) on Weights & Biases.

### Moderation

OpenAI provides a free Moderation endpoint that can be used to check whether content complies with the OpenAI [content policy](https://platform.openai.com/docs/usage-policies).
Expand Down
2 changes: 1 addition & 1 deletion openai/_openai_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def help(args):
subparsers = parser.add_subparsers()
sub_api = subparsers.add_parser("api", help="Direct API calls")
sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
sub_wandb = subparsers.add_parser("wandb", help="Logging with Weights & Biases")
sub_wandb = subparsers.add_parser("wandb", help="Logging with Weights & Biases, see https://docs.wandb.ai/guides/integrations/openai for documentation")

api_register(sub_api)
tools_register(sub_tools)
Expand Down
14 changes: 10 additions & 4 deletions openai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1375,7 +1375,7 @@ def help(args):

def wandb_register(parser):
subparsers = parser.add_subparsers(
title="wandb", help="Logging with Weights & Biases"
title="wandb", help="Logging with Weights & Biases, see https://docs.wandb.ai/guides/integrations/openai for documentation"
)

def help(args):
Expand All @@ -1394,17 +1394,23 @@ def help(args):
)
sub.add_argument(
"--project",
default="GPT-3",
help="""Name of the project where you're sending runs. By default, it is "GPT-3".""",
default="OpenAI-Fine-Tune",
help="""Name of the Weights & Biases project where you're sending runs. By default, it is "OpenAI-Fine-Tune".""",
)
sub.add_argument(
"--entity",
help="Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.",
help="Weights & Biases username or team name where you're sending runs. By default, your default entity is used, which is usually your username.",
)
sub.add_argument(
"--force",
action="store_true",
help="Forces logging and overwrite existing wandb run of the same fine-tune.",
)
sub.add_argument(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One question - will this break existing workflows for users who expect to be able to use WandB sync with the old version of the fine tuning API? That might be okay, we'll just need to note as much in the release notes. My understanding is, if an existing user of the old API (/v1/fine-tunes) upgraded their openai-python version and tried to run fine tuning, unless they provided --legacy, this would break?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep that’s correct

"--legacy",
action="store_true",
help="Log results from legacy OpenAI /v1/fine-tunes api",
)
sub.set_defaults(force=False)
sub.set_defaults(legacy=False)
sub.set_defaults(func=WandbLogger.sync)
28 changes: 21 additions & 7 deletions openai/wandb_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
import re
from pathlib import Path

from openai import File, FineTune
from openai import File, FineTune, FineTuningJob
from openai.datalib.numpy_helper import numpy as np
from openai.datalib.pandas_helper import pandas as pd
from openai.datalib.pandas_helper import assert_has_pandas, pandas as pd


class WandbLogger:
Expand All @@ -34,9 +34,10 @@ def sync(
cls,
id=None,
n_fine_tunes=None,
project="GPT-3",
project="OpenAI-Fine-Tune",
entity=None,
force=False,
legacy=False,
**kwargs_wandb_init,
):
"""
Expand All @@ -47,18 +48,26 @@ def sync(
:param entity: Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.
:param force: Forces logging and overwrite existing wandb run of the same fine-tune.
"""

assert_has_pandas()

if not WANDB_AVAILABLE:
return

if id:
fine_tune = FineTune.retrieve(id=id)
print("Retrieving fine-tune job...")
if legacy:
fine_tune = FineTune.retrieve(id=id)
else:
fine_tune = FineTuningJob.retrieve(id=id)
fine_tune.pop("events", None)
fine_tunes = [fine_tune]

else:
# get list of fine_tune to log
fine_tunes = FineTune.list()
if legacy:
fine_tunes = FineTune.list()
else:
fine_tunes = list(FineTuningJob.auto_paging_iter())
if not fine_tunes or fine_tunes.get("data") is None:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fine_tunes is a list when not legacy so .get() now fails.

print("No fine-tune has been retrieved")
return
Expand All @@ -76,6 +85,7 @@ def sync(
project,
entity,
force,
legacy,
show_individual_warnings,
**kwargs_wandb_init,
)
Expand All @@ -94,6 +104,7 @@ def _log_fine_tune(
project,
entity,
force,
legacy,
show_individual_warnings,
**kwargs_wandb_init,
):
Expand All @@ -110,7 +121,10 @@ def _log_fine_tune(

# check results are present
try:
results_id = fine_tune["result_files"][0]["id"]
if legacy:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good

results_id = fine_tune["result_files"][0]["id"]
else:
results_id = fine_tune["result_files"][0]
results = File.download(id=results_id).decode("utf-8")
except:
if show_individual_warnings:
Expand Down