Merge branch 'ludwig-ai:master' into fix_adapter_retraining

ludwig-ai · May 3, 2024 · 6fc53d6 · 6fc53d6
2 parents 0399f8e + b6df715
commit 6fc53d6
Show file tree

Hide file tree

Showing 48 changed files with 374 additions and 158 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -5,7 +5,7 @@ way to help the community. Answering questions, helping others, reaching out and
 documentation are immensely valuable contributions as well.
 
 It also helps us if you spread the word: reference the library from blog posts on the awesome
-projects it made possible, shout out on Twitter every time it has helped you, or simply star the
+projects it made possible, shout out on X every time it has helped you, or simply star the
 repo to say "thank you".
 
 Check out the official [ludwig docs](https://ludwig-ai.github.io/ludwig-docs/) to get oriented
@@ -96,8 +96,7 @@ Work on your self-assigned issue and eventually create a Pull Request.
    To do that, edit the file `requirements_extra.txt` and comment out the line that begins with `horovod`.  After that,
    please execute the long `pip install` command given in the previous step.  With these work-around provisions, your
    installation should run to completion successfully.  If you are still having difficulty, please reach out with the
-   specifics of your environment in the Ludwig Community
-   [Slack](https://join.slack.com/t/ludwig-ai/shared_invite/zt-mrxo87w6-DlX5~73T2B4v_g6jj0pJcQ).
+   specifics of your environment in the Ludwig Community [Discord](https://discord.gg/CBgdrGnZjy).
 
 1. Develop features on your branch.
 

diff --git a/README.md b/README.md
@@ -9,14 +9,17 @@
 _Declarative deep learning framework built for scale and efficiency._
 
 [![PyPI version](https://badge.fury.io/py/ludwig.svg)](https://badge.fury.io/py/ludwig)
-[![Slack](https://img.shields.io/badge/slack-chat-green.svg?logo=slack)](https://join.slack.com/t/ludwig-ai/shared_invite/zt-mrxo87w6-DlX5~73T2B4v_g6jj0pJcQ)
+[![Discord](https://dcbadge.vercel.app/api/server/CBgdrGnZjy?style=flat&theme=discord-inverted)](https://discord.gg/CBgdrGnZjy)
 [![DockerHub](https://img.shields.io/docker/pulls/ludwigai/ludwig.svg)](https://hub.docker.com/r/ludwigai)
 [![Downloads](https://pepy.tech/badge/ludwig)](https://pepy.tech/project/ludwig)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/ludwig-ai/ludwig/blob/master/LICENSE)
-[![Twitter](https://img.shields.io/twitter/follow/ludwig_ai.svg?style=social&logo=twitter)](https://twitter.com/ludwig_ai)
+[![X](https://img.shields.io/twitter/follow/ludwig_ai.svg?style=social&logo=twitter)](https://twitter.com/ludwig_ai)
 
 </div>
 
+> \[!IMPORTANT\]
+> Our community has moved to [Discord](https://discord.gg/CBgdrGnZjy) -- please join us there!
+
 # 📖 What is Ludwig?
 
 Ludwig is a **low-code** framework for building **custom** AI models like **LLMs** and other deep neural networks.
@@ -193,7 +196,7 @@ ludwig train --config model.yaml --dataset rotten_tomatoes.csv
 
 **Happy modeling**
 
-Try applying Ludwig to your data. [Reach out](https://join.slack.com/t/ludwig-ai/shared_invite/zt-mrxo87w6-DlX5~73T2B4v_g6jj0pJcQ)
+Try applying Ludwig to your data. [Reach out on Discord](https://discord.gg/CBgdrGnZjy)
 if you have any questions.
 
 # ❓ Why you should use Ludwig
@@ -313,7 +316,7 @@ Read our publications on [Ludwig](https://arxiv.org/pdf/1909.07930.pdf), [declar
 Learn more about [how Ludwig works](https://ludwig-ai.github.io/ludwig-docs/latest/user_guide/how_ludwig_works/), [how to get started](https://ludwig-ai.github.io/ludwig-docs/latest/getting_started/), and work through more [examples](https://ludwig-ai.github.io/ludwig-docs/latest/examples).
 
 If you are interested in [contributing](https://github.com/ludwig-ai/ludwig/blob/master/CONTRIBUTING.md), have questions, comments, or thoughts to share, or if you just want to be in the
-know, please consider [joining the Ludwig Slack](https://join.slack.com/t/ludwig-ai/shared_invite/zt-mrxo87w6-DlX5~73T2B4v_g6jj0pJcQ) and follow us on [Twitter](https://twitter.com/ludwig_ai)!
+know, please consider [joining our Community Discord](https://discord.gg/CBgdrGnZjy) and follow us on [X](https://twitter.com/ludwig_ai)!
 
 # 🤝 Join the community to build Ludwig with us
 
@@ -331,7 +334,7 @@ more accessible and feature rich framework for everyone to use!
 
 # 👋 Getting Involved
 
-- [Slack](https://join.slack.com/t/ludwig-ai/shared_invite/zt-mrxo87w6-DlX5~73T2B4v_g6jj0pJcQ)
-- [Twitter](https://twitter.com/ludwig_ai)
+- [Discord](https://discord.gg/CBgdrGnZjy)
+- [X](https://twitter.com/ludwig_ai)
 - [Medium](https://medium.com/ludwig-ai)
 - [GitHub Issues](https://github.com/ludwig-ai/ludwig/issues)
diff --git a/examples/llm_base_model_dequantization/phi_2_dequantization.py b/examples/llm_base_model_dequantization/phi_2_dequantization.py
@@ -1,4 +1,5 @@
 import logging
+import os
 
 import yaml
 from huggingface_hub import whoami
@@ -10,7 +11,7 @@
 base_model_name = "microsoft/phi-2"
 dequantized_path = "microsoft-phi-2-dequantized"
 save_path = "/home/ray/" + dequantized_path
-hfhub_repo_id = hf_username + dequantized_path
+hfhub_repo_id = os.path.join(hf_username, dequantized_path)
 
 
 config = yaml.safe_load(

diff --git a/ludwig/api.py b/ludwig/api.py
@@ -64,9 +64,12 @@
 from ludwig.features.feature_registries import update_config_with_metadata, update_config_with_model
 from ludwig.globals import (
     LUDWIG_VERSION,
+    MODEL_FILE_NAME,
     MODEL_HYPERPARAMETERS_FILE_NAME,
+    MODEL_WEIGHTS_FILE_NAME,
     set_disable_progressbar,
     TRAIN_SET_METADATA_FILE_NAME,
+    TRAINING_CHECKPOINTS_DIR_PATH,
 )
 from ludwig.models.base import BaseModel
 from ludwig.models.calibrator import Calibrator
@@ -109,6 +112,7 @@
 from ludwig.utils.torch_utils import DEVICE
 from ludwig.utils.trainer_utils import get_training_report
 from ludwig.utils.types import DataFrame, TorchDevice
+from ludwig.utils.upload_utils import HuggingFaceHub
 
 logger = logging.getLogger(__name__)
 
@@ -1282,9 +1286,12 @@ def evaluate(
                     self.model.output_features, predictions, dataset, training_set_metadata
                 )
                 eval_stats = {
-                    of_name: {**eval_stats[of_name], **overall_stats[of_name]}
-                    # account for presence of 'combined' key
-                    if of_name in overall_stats else {**eval_stats[of_name]}
+                    of_name: (
+                        {**eval_stats[of_name], **overall_stats[of_name]}
+                        # account for presence of 'combined' key
+                        if of_name in overall_stats
+                        else {**eval_stats[of_name]}
+                    )
                     for of_name in eval_stats
                 }
 
@@ -1765,6 +1772,7 @@ def load(
         gpu_memory_limit: Optional[float] = None,
         allow_parallel_threads: bool = True,
         callbacks: List[Callback] = None,
+        from_checkpoint: bool = False,
     ) -> "LudwigModel":  # return is an instance of ludwig.api.LudwigModel class
         """This function allows for loading pretrained models.
 
@@ -1788,6 +1796,9 @@ def load(
         :param callbacks: (list, default: `None`) a list of
             `ludwig.callbacks.Callback` objects that provide hooks into the
             Ludwig pipeline.
+        :param from_checkpoint: (bool, default: `False`) if `True`, the model
+            will be loaded from the latest checkpoint (training_checkpoints/)
+            instead of the final model weights.
 
         # Return
 
@@ -1834,7 +1845,7 @@ def load(
         ludwig_model.model = LudwigModel.create_model(config_obj)
 
         # load model weights
-        ludwig_model.load_weights(model_dir)
+        ludwig_model.load_weights(model_dir, from_checkpoint)
 
         # The LoRA layers appear to be loaded again (perhaps due to a potential bug); hence, we merge and unload again.
         if ludwig_model.is_merge_and_unload_set():
@@ -1851,12 +1862,16 @@ def load(
     def load_weights(
         self,
         model_dir: str,
+        from_checkpoint: bool = False,
     ) -> None:
         """Loads weights from a pre-trained model.
 
         # Inputs
         :param model_dir: (str) filepath string to location of a pre-trained
             model
+        :param from_checkpoint: (bool, default: `False`) if `True`, the model
+            will be loaded from the latest checkpoint (training_checkpoints/)
+            instead of the final model weights.
 
         # Return
         :return: `None`
@@ -1868,7 +1883,16 @@ def load_weights(
         ```
         """
         if self.backend.is_coordinator():
-            self.model.load(model_dir)
+            if from_checkpoint:
+                with self.backend.create_trainer(
+                    model=self.model,
+                    config=self.config_obj.trainer,
+                ) as trainer:
+                    checkpoint = trainer.create_checkpoint_handle()
+                    training_checkpoints_path = os.path.join(model_dir, TRAINING_CHECKPOINTS_DIR_PATH)
+                    trainer.resume_weights_and_optimizer(training_checkpoints_path, checkpoint)
+            else:
+                self.model.load(model_dir)
 
         self.backend.sync_model(self.model)
 
@@ -1917,35 +1941,48 @@ def upload_to_hf_hub(
 
         # Inputs
 
-        :param repo_id (`str`):
+        :param repo_id: (`str`)
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        :param model_path (`str`):
-            The path of the saved model. This is the top level directory where
-            the models weights as well as other associated training artifacts
-            are saved.
-        :param private (`bool`, *optional*, defaults to `False`):
+        :param model_path: (`str`)
+            The path of the saved model. This is either (a) the folder where
+            the 'model_weights' folder and the 'model_hyperparameters.json' file
+            are stored, or (b) the parent of that folder.
+        :param private: (`bool`, *optional*, defaults to `False`)
             Whether the model repo should be private.
-        :param repo_type (`str`, *optional*):
+        :param repo_type: (`str`, *optional*)
             Set to `"dataset"` or `"space"` if uploading to a dataset or
             space, `None` or `"model"` if uploading to a model. Default is
             `None`.
-        :param commit_message (`str`, *optional*):
+        :param commit_message: (`str`, *optional*)
             The summary / title / first line of the generated commit. Defaults to:
             `f"Upload {path_in_repo} with huggingface_hub"`
-        :param commit_description (`str` *optional*):
+        :param commit_description: (`str` *optional*)
             The description of the generated commit
 
         # Returns
 
         :return: (bool) True for success, False for failure.
         """
+        if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
+            os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
+        ):
+            experiment_path = model_path
+        elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
+            os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME)
+        ):
+            experiment_path = os.path.dirname(model_path)
+        else:
+            raise ValueError(
+                f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at "
+                f"'{model_path}' or at '{model_path}/model'"
+            )
         model_service = get_upload_registry()["hf_hub"]
-        hub = model_service()
+        hub: HuggingFaceHub = model_service()
         hub.login()
-        upload_status = hub.upload(
+        upload_status: bool = hub.upload(
             repo_id=repo_id,
-            model_path=model_path,
+            model_path=experiment_path,
             repo_type=repo_type,
             private=private,
             commit_message=commit_message,

diff --git a/ludwig/automl/automl.py b/ludwig/automl/automl.py
@@ -50,7 +50,7 @@
 from ludwig.contrib import add_contrib_callback_args
 from ludwig.data.cache.types import CacheableDataset
 from ludwig.datasets import load_dataset_uris
-from ludwig.globals import LUDWIG_VERSION
+from ludwig.globals import LUDWIG_VERSION, MODEL_FILE_NAME
 from ludwig.hyperopt.run import hyperopt
 from ludwig.schema.model_config import ModelConfig
 from ludwig.types import ModelConfigDict
@@ -101,10 +101,10 @@ def best_model(self) -> Optional[LudwigModel]:
             # Read remote URIs using Ludwig's internal remote file loading APIs, as
             # Ray's do not handle custom credentials at the moment.
             with use_credentials(self._creds):
-                return LudwigModel.load(os.path.join(ckpt_path, "model"))
+                return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME))
         else:
             with checkpoint.as_directory() as ckpt_path:
-                return LudwigModel.load(os.path.join(ckpt_path, "model"))
+                return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME))
 
 
 @PublicAPI

diff --git a/ludwig/benchmarking/artifacts.py b/ludwig/benchmarking/artifacts.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from typing import Any, Dict
 
+from ludwig.globals import MODEL_FILE_NAME
 from ludwig.types import ModelConfigDict, TrainingSetMetadataDict
 from ludwig.utils.data_utils import load_json, load_yaml
 
@@ -55,7 +56,11 @@ def build_benchmarking_result(benchmarking_config: dict, experiment_idx: int):
         description=load_json(os.path.join(experiment_run_path, "description.json")),
         test_statistics=load_json(os.path.join(experiment_run_path, "test_statistics.json")),
         training_statistics=load_json(os.path.join(experiment_run_path, "training_statistics.json")),
-        model_hyperparameters=load_json(os.path.join(experiment_run_path, "model", "model_hyperparameters.json")),
-        training_progress=load_json(os.path.join(experiment_run_path, "model", "training_progress.json")),
-        training_set_metadata=load_json(os.path.join(experiment_run_path, "model", "training_set_metadata.json")),
+        model_hyperparameters=load_json(
+            os.path.join(experiment_run_path, MODEL_FILE_NAME, "model_hyperparameters.json")
+        ),
+        training_progress=load_json(os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_progress.json")),
+        training_set_metadata=load_json(
+            os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_set_metadata.json")
+        ),
     )
diff --git a/ludwig/benchmarking/summary_dataclasses.py b/ludwig/benchmarking/summary_dataclasses.py
@@ -7,7 +7,7 @@
 
 import ludwig.modules.metric_modules  # noqa: F401
 from ludwig.benchmarking.utils import format_memory, format_time
-from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
+from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME
 from ludwig.modules.metric_registry import get_metric_classes, metric_feature_type_registry  # noqa: F401
 from ludwig.types import ModelConfigDict
 from ludwig.utils.data_utils import load_json
@@ -209,7 +209,7 @@ def build_metrics_summary(experiment_local_directory: str) -> MetricsSummary:
         e.g. local_experiment_repo/ames_housing/some_experiment/
     """
     config = load_json(
-        os.path.join(experiment_local_directory, "experiment_run", "model", MODEL_HYPERPARAMETERS_FILE_NAME)
+        os.path.join(experiment_local_directory, "experiment_run", MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
     )
     report = load_json(os.path.join(experiment_local_directory, "experiment_run", "test_statistics.json"))
     output_feature_type: str = config["output_features"][0]["type"]

diff --git a/ludwig/benchmarking/utils.py b/ludwig/benchmarking/utils.py
@@ -16,7 +16,7 @@
 from ludwig.constants import BINARY, CATEGORY
 from ludwig.datasets import model_configs_for_dataset
 from ludwig.datasets.loaders.dataset_loader import DatasetLoader
-from ludwig.globals import CONFIG_YAML
+from ludwig.globals import CONFIG_YAML, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME
 from ludwig.utils.data_utils import load_yaml
 from ludwig.utils.dataset_utils import get_repeatable_train_val_test_split
 from ludwig.utils.defaults import default_random_seed
@@ -251,9 +251,9 @@ def delete_model_checkpoints(output_directory: str):
     Args:
         output_directory: output directory of the hyperopt run.
     """
-    shutil.rmtree(os.path.join(output_directory, "model", "training_checkpoints"), ignore_errors=True)
-    if os.path.isfile(os.path.join(output_directory, "model", "model_weights")):
-        os.remove(os.path.join(output_directory, "model", "model_weights"))
+    shutil.rmtree(os.path.join(output_directory, MODEL_FILE_NAME, "training_checkpoints"), ignore_errors=True)
+    if os.path.isfile(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)):
+        os.remove(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME))
 
 
 def delete_hyperopt_outputs(output_directory: str):

diff --git a/ludwig/contribs/mlflow/__init__.py b/ludwig/contribs/mlflow/__init__.py
@@ -6,7 +6,7 @@
 from ludwig.api_annotations import DeveloperAPI, PublicAPI
 from ludwig.callbacks import Callback
 from ludwig.constants import TRAINER
-from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME
+from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME
 from ludwig.types import TrainingSetMetadataDict
 from ludwig.utils.data_utils import chunk_dict, flatten_dict, save_json, to_json_dict
 from ludwig.utils.package_utils import LazyLoader
@@ -258,7 +258,7 @@ def _log_mlflow(log_metrics, steps, save_path, should_continue, log_artifacts: b
 def _log_artifacts(output_directory):
     for fname in os.listdir(output_directory):
         lpath = os.path.join(output_directory, fname)
-        if fname == "model":
+        if fname == MODEL_FILE_NAME:
             _log_model(lpath)
         else:
             mlflow.log_artifact(lpath)

diff --git a/ludwig/contribs/mlflow/model.py b/ludwig/contribs/mlflow/model.py
@@ -16,7 +16,7 @@
 from mlflow.utils.model_utils import _get_flavor_configuration
 
 from ludwig.api_annotations import DeveloperAPI
-from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
+from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME
 from ludwig.utils.data_utils import load_json
 
 FLAVOR_NAME = "ludwig"
@@ -97,7 +97,7 @@ def save_model(
     path = os.path.abspath(path)
     if os.path.exists(path):
         raise MlflowException(f"Path '{path}' already exists")
-    model_data_subpath = "model"
+    model_data_subpath = MODEL_FILE_NAME
     model_data_path = os.path.join(path, model_data_subpath)
     os.makedirs(path)
     if mlflow_model is None:
@@ -267,7 +267,7 @@ def export_model(model_path, output_path, registered_model_name=None):
         if not model_path.startswith("runs:/") or output_path is not None:
             # No run specified, so in order to register the model in mlflow, we need
             # to create a new run and upload the model as an artifact first
-            output_path = output_path or "model"
+            output_path = output_path or MODEL_FILE_NAME
             log_model(
                 _CopyModel(model_path),
                 artifact_path=output_path,
@@ -295,7 +295,7 @@ def log_saved_model(lpath):
     """
     log_model(
         _CopyModel(lpath),
-        artifact_path="model",
+        artifact_path=MODEL_FILE_NAME,
     )
 
 

diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py
@@ -752,7 +752,9 @@ def _finalize_preprocessing_parameters(
         sample_num_bytes = []
         failed_entries = []
         for image_entry in column.head(sample_size):
-            if isinstance(image_entry, str):
+            if isinstance(image_entry, bytes):
+                image = read_image_from_bytes_obj(image_entry)
+            elif isinstance(image_entry, str):
                 # Tries to read image as PNG or numpy file from the path.
                 image, num_bytes = read_image_from_path(image_entry, return_num_bytes=True)
                 if num_bytes is not None: