Skip to content

Commit

Permalink
Fix for 'upload_to_hf_hub()' path mismatch with 'save()' (#3977)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
sanjaydasgupta and pre-commit-ci[bot] authored Mar 24, 2024
1 parent b2cb8b0 commit 4b07ce4
Show file tree
Hide file tree
Showing 38 changed files with 192 additions and 121 deletions.
40 changes: 28 additions & 12 deletions ludwig/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@
from ludwig.features.feature_registries import update_config_with_metadata, update_config_with_model
from ludwig.globals import (
LUDWIG_VERSION,
MODEL_FILE_NAME,
MODEL_HYPERPARAMETERS_FILE_NAME,
MODEL_WEIGHTS_FILE_NAME,
set_disable_progressbar,
TRAIN_SET_METADATA_FILE_NAME,
TRAINING_CHECKPOINTS_DIR_PATH,
Expand Down Expand Up @@ -110,6 +112,7 @@
from ludwig.utils.torch_utils import DEVICE
from ludwig.utils.trainer_utils import get_training_report
from ludwig.utils.types import DataFrame, TorchDevice
from ludwig.utils.upload_utils import HuggingFaceHub

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1938,35 +1941,48 @@ def upload_to_hf_hub(
# Inputs
:param repo_id (`str`):
:param repo_id: (`str`)
A namespace (user or an organization) and a repo name separated
by a `/`.
:param model_path (`str`):
The path of the saved model. This is the top level directory where
the models weights as well as other associated training artifacts
are saved.
:param private (`bool`, *optional*, defaults to `False`):
:param model_path: (`str`)
The path of the saved model. This is either (a) the folder where
the 'model_weights' folder and the 'model_hyperparameters.json' file
are stored, or (b) the parent of that folder.
:param private: (`bool`, *optional*, defaults to `False`)
Whether the model repo should be private.
:param repo_type (`str`, *optional*):
:param repo_type: (`str`, *optional*)
Set to `"dataset"` or `"space"` if uploading to a dataset or
space, `None` or `"model"` if uploading to a model. Default is
`None`.
:param commit_message (`str`, *optional*):
:param commit_message: (`str`, *optional*)
The summary / title / first line of the generated commit. Defaults to:
`f"Upload {path_in_repo} with huggingface_hub"`
:param commit_description (`str` *optional*):
:param commit_description: (`str` *optional*)
The description of the generated commit
# Returns
:return: (bool) True for success, False for failure.
"""
if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = model_path
elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = os.path.dirname(model_path)
else:
raise ValueError(
f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at "
f"'{model_path}' or at '{model_path}/model'"
)
model_service = get_upload_registry()["hf_hub"]
hub = model_service()
hub: HuggingFaceHub = model_service()
hub.login()
upload_status = hub.upload(
upload_status: bool = hub.upload(
repo_id=repo_id,
model_path=model_path,
model_path=experiment_path,
repo_type=repo_type,
private=private,
commit_message=commit_message,
Expand Down
6 changes: 3 additions & 3 deletions ludwig/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from ludwig.contrib import add_contrib_callback_args
from ludwig.data.cache.types import CacheableDataset
from ludwig.datasets import load_dataset_uris
from ludwig.globals import LUDWIG_VERSION
from ludwig.globals import LUDWIG_VERSION, MODEL_FILE_NAME
from ludwig.hyperopt.run import hyperopt
from ludwig.schema.model_config import ModelConfig
from ludwig.types import ModelConfigDict
Expand Down Expand Up @@ -101,10 +101,10 @@ def best_model(self) -> Optional[LudwigModel]:
# Read remote URIs using Ludwig's internal remote file loading APIs, as
# Ray's do not handle custom credentials at the moment.
with use_credentials(self._creds):
return LudwigModel.load(os.path.join(ckpt_path, "model"))
return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME))
else:
with checkpoint.as_directory() as ckpt_path:
return LudwigModel.load(os.path.join(ckpt_path, "model"))
return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME))


@PublicAPI
Expand Down
11 changes: 8 additions & 3 deletions ludwig/benchmarking/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
from typing import Any, Dict

from ludwig.globals import MODEL_FILE_NAME
from ludwig.types import ModelConfigDict, TrainingSetMetadataDict
from ludwig.utils.data_utils import load_json, load_yaml

Expand Down Expand Up @@ -55,7 +56,11 @@ def build_benchmarking_result(benchmarking_config: dict, experiment_idx: int):
description=load_json(os.path.join(experiment_run_path, "description.json")),
test_statistics=load_json(os.path.join(experiment_run_path, "test_statistics.json")),
training_statistics=load_json(os.path.join(experiment_run_path, "training_statistics.json")),
model_hyperparameters=load_json(os.path.join(experiment_run_path, "model", "model_hyperparameters.json")),
training_progress=load_json(os.path.join(experiment_run_path, "model", "training_progress.json")),
training_set_metadata=load_json(os.path.join(experiment_run_path, "model", "training_set_metadata.json")),
model_hyperparameters=load_json(
os.path.join(experiment_run_path, MODEL_FILE_NAME, "model_hyperparameters.json")
),
training_progress=load_json(os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_progress.json")),
training_set_metadata=load_json(
os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_set_metadata.json")
),
)
4 changes: 2 additions & 2 deletions ludwig/benchmarking/summary_dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import ludwig.modules.metric_modules # noqa: F401
from ludwig.benchmarking.utils import format_memory, format_time
from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.modules.metric_registry import get_metric_classes, metric_feature_type_registry # noqa: F401
from ludwig.types import ModelConfigDict
from ludwig.utils.data_utils import load_json
Expand Down Expand Up @@ -209,7 +209,7 @@ def build_metrics_summary(experiment_local_directory: str) -> MetricsSummary:
e.g. local_experiment_repo/ames_housing/some_experiment/
"""
config = load_json(
os.path.join(experiment_local_directory, "experiment_run", "model", MODEL_HYPERPARAMETERS_FILE_NAME)
os.path.join(experiment_local_directory, "experiment_run", MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
)
report = load_json(os.path.join(experiment_local_directory, "experiment_run", "test_statistics.json"))
output_feature_type: str = config["output_features"][0]["type"]
Expand Down
8 changes: 4 additions & 4 deletions ludwig/benchmarking/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ludwig.constants import BINARY, CATEGORY
from ludwig.datasets import model_configs_for_dataset
from ludwig.datasets.loaders.dataset_loader import DatasetLoader
from ludwig.globals import CONFIG_YAML
from ludwig.globals import CONFIG_YAML, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME
from ludwig.utils.data_utils import load_yaml
from ludwig.utils.dataset_utils import get_repeatable_train_val_test_split
from ludwig.utils.defaults import default_random_seed
Expand Down Expand Up @@ -251,9 +251,9 @@ def delete_model_checkpoints(output_directory: str):
Args:
output_directory: output directory of the hyperopt run.
"""
shutil.rmtree(os.path.join(output_directory, "model", "training_checkpoints"), ignore_errors=True)
if os.path.isfile(os.path.join(output_directory, "model", "model_weights")):
os.remove(os.path.join(output_directory, "model", "model_weights"))
shutil.rmtree(os.path.join(output_directory, MODEL_FILE_NAME, "training_checkpoints"), ignore_errors=True)
if os.path.isfile(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)):
os.remove(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME))


def delete_hyperopt_outputs(output_directory: str):
Expand Down
4 changes: 2 additions & 2 deletions ludwig/contribs/mlflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ludwig.api_annotations import DeveloperAPI, PublicAPI
from ludwig.callbacks import Callback
from ludwig.constants import TRAINER
from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME
from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME
from ludwig.types import TrainingSetMetadataDict
from ludwig.utils.data_utils import chunk_dict, flatten_dict, save_json, to_json_dict
from ludwig.utils.package_utils import LazyLoader
Expand Down Expand Up @@ -258,7 +258,7 @@ def _log_mlflow(log_metrics, steps, save_path, should_continue, log_artifacts: b
def _log_artifacts(output_directory):
for fname in os.listdir(output_directory):
lpath = os.path.join(output_directory, fname)
if fname == "model":
if fname == MODEL_FILE_NAME:
_log_model(lpath)
else:
mlflow.log_artifact(lpath)
Expand Down
8 changes: 4 additions & 4 deletions ludwig/contribs/mlflow/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from mlflow.utils.model_utils import _get_flavor_configuration

from ludwig.api_annotations import DeveloperAPI
from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.utils.data_utils import load_json

FLAVOR_NAME = "ludwig"
Expand Down Expand Up @@ -97,7 +97,7 @@ def save_model(
path = os.path.abspath(path)
if os.path.exists(path):
raise MlflowException(f"Path '{path}' already exists")
model_data_subpath = "model"
model_data_subpath = MODEL_FILE_NAME
model_data_path = os.path.join(path, model_data_subpath)
os.makedirs(path)
if mlflow_model is None:
Expand Down Expand Up @@ -267,7 +267,7 @@ def export_model(model_path, output_path, registered_model_name=None):
if not model_path.startswith("runs:/") or output_path is not None:
# No run specified, so in order to register the model in mlflow, we need
# to create a new run and upload the model as an artifact first
output_path = output_path or "model"
output_path = output_path or MODEL_FILE_NAME
log_model(
_CopyModel(model_path),
artifact_path=output_path,
Expand Down Expand Up @@ -295,7 +295,7 @@ def log_saved_model(lpath):
"""
log_model(
_CopyModel(lpath),
artifact_path="model",
artifact_path=MODEL_FILE_NAME,
)


Expand Down
1 change: 1 addition & 0 deletions ludwig/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

LUDWIG_VERSION = "0.10.2.dev"

MODEL_FILE_NAME = "model"
MODEL_WEIGHTS_FILE_NAME = "model_weights"
MODEL_HYPERPARAMETERS_FILE_NAME = "model_hyperparameters.json"
TRAIN_SET_METADATA_FILE_NAME = "training_set_metadata.json"
Expand Down
7 changes: 4 additions & 3 deletions ludwig/hyperopt/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from ludwig.backend.ray import initialize_ray
from ludwig.callbacks import Callback
from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION
from ludwig.globals import MODEL_FILE_NAME
from ludwig.hyperopt.results import HyperoptResults, TrialResults
from ludwig.hyperopt.syncer import RemoteSyncer
from ludwig.hyperopt.utils import load_json_values, substitute_parameters
Expand Down Expand Up @@ -125,7 +126,7 @@ def ignore_dot_files(src, files):
return [f for f in files if f.startswith(".")]

with tune.checkpoint_dir(step=progress_tracker.tune_checkpoint_num) as checkpoint_dir:
checkpoint_model = os.path.join(checkpoint_dir, "model")
checkpoint_model = os.path.join(checkpoint_dir, MODEL_FILE_NAME)
# Atomic copying of the checkpoints
if not os.path.isdir(checkpoint_model):
copy_id = uuid.uuid4()
Expand Down Expand Up @@ -411,7 +412,7 @@ def _evaluate_best_model(
debug,
):
best_model = LudwigModel.load(
os.path.join(best_model_path, "model"),
os.path.join(best_model_path, MODEL_FILE_NAME),
backend=backend,
gpus=gpus,
gpu_memory_limit=gpu_memory_limit,
Expand Down Expand Up @@ -549,7 +550,7 @@ def on_trainer_train_setup(self, trainer, save_path, is_coordinator):
os.rename(save_path, tmp_path)

try:
safe_move_file(os.path.join(ckpt_path, "model"), save_path)
safe_move_file(os.path.join(ckpt_path, MODEL_FILE_NAME), save_path)
except Exception:
# Rollback from partial changes. Remove the save_path
# and move the original save_path back.
Expand Down
3 changes: 2 additions & 1 deletion ludwig/trainers/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from abc import ABC, abstractmethod

from ludwig.data.dataset.base import Dataset
from ludwig.globals import MODEL_FILE_NAME
from ludwig.schema.trainer import BaseTrainerConfig
from ludwig.types import ModelConfigDict
from ludwig.utils.defaults import default_random_seed


class BaseTrainer(ABC):
@abstractmethod
def train(self, training_set, validation_set=None, test_set=None, save_path="model", **kwargs):
def train(self, training_set, validation_set=None, test_set=None, save_path=MODEL_FILE_NAME, **kwargs):
raise NotImplementedError()

@abstractmethod
Expand Down
3 changes: 2 additions & 1 deletion ludwig/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from ludwig.distributed.base import DistributedStrategy, LocalStrategy
from ludwig.globals import (
is_progressbar_disabled,
MODEL_FILE_NAME,
MODEL_HYPERPARAMETERS_FILE_NAME,
TRAINING_CHECKPOINTS_DIR_PATH,
TRAINING_PROGRESS_TRACKER_FILE_NAME,
Expand Down Expand Up @@ -831,7 +832,7 @@ def train(
training_set,
validation_set=None,
test_set=None,
save_path="model",
save_path=MODEL_FILE_NAME,
return_state_dict: bool = False,
**kwargs,
):
Expand Down
9 changes: 7 additions & 2 deletions ludwig/trainers/trainer_lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
from ludwig.distributed import init_dist_strategy
from ludwig.distributed.base import DistributedStrategy, LocalStrategy
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.globals import is_progressbar_disabled, TRAINING_CHECKPOINTS_DIR_PATH, TRAINING_PROGRESS_TRACKER_FILE_NAME
from ludwig.globals import (
is_progressbar_disabled,
MODEL_FILE_NAME,
TRAINING_CHECKPOINTS_DIR_PATH,
TRAINING_PROGRESS_TRACKER_FILE_NAME,
)
from ludwig.models.gbm import GBM
from ludwig.models.predictor import Predictor
from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value
Expand Down Expand Up @@ -562,7 +567,7 @@ def train(
training_set: Union["Dataset", "RayDataset"], # noqa: F821
validation_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821
test_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821
save_path="model",
save_path=MODEL_FILE_NAME,
**kwargs,
):
# ====== General setup =======
Expand Down
3 changes: 2 additions & 1 deletion ludwig/trainers/trainer_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ludwig.data.dataset.base import Dataset
from ludwig.distributed.base import DistributedStrategy, LocalStrategy
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.globals import MODEL_FILE_NAME
from ludwig.models.llm import LLM
from ludwig.models.predictor import LlmFineTunePredictor, LlmPredictor
from ludwig.modules.metric_modules import get_initial_validation_value
Expand Down Expand Up @@ -140,7 +141,7 @@ def train(
training_set: Dataset,
validation_set: Optional[Dataset] = None,
test_set: Optional[Dataset] = None,
save_path: str = "model",
save_path: str = MODEL_FILE_NAME,
return_state_dict: bool = False,
**kwargs,
):
Expand Down
25 changes: 20 additions & 5 deletions ludwig/upload.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import argparse
import logging
import os
import sys
from typing import Optional

from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME
from ludwig.utils.print_utils import get_logging_level_registry
from ludwig.utils.upload_utils import HuggingFaceHub, Predibase

Expand Down Expand Up @@ -38,9 +40,9 @@ def upload_cli(
A namespace (user or an organization) and a repo name separated
by a `/`.
model_path (`str`):
The path of the saved model. This is the top level directory where
the models weights as well as other associated training artifacts
are saved.
The path of the saved model. This is the parent-folder of the folder
where the 'model_weights' folder and the 'model_hyperparameters.json' file
are stored.
private (`bool`, *optional*, defaults to `False`):
Whether the model repo should be private.
repo_type (`str`, *optional*):
Expand All @@ -60,10 +62,23 @@ def upload_cli(
`"predibase"`.
"""
model_service = get_upload_registry().get(service, "hf_hub")
hub = model_service()
hub: HuggingFaceHub = model_service()
if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = model_path
elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = os.path.normpath(os.path.join(model_path, ".."))
else:
raise ValueError(
f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at "
f"'{model_path}' or at '{model_path}/model'"
)
hub.upload(
repo_id=repo_id,
model_path=model_path,
model_path=experiment_path,
repo_type=repo_type,
private=private,
commit_message=commit_message,
Expand Down
Loading

0 comments on commit 4b07ce4

Please sign in to comment.