opensearch-project · dhrubo-os · Jan 20, 2023 · Jan 12, 2023 · Jan 20, 2023 · mingshl
@@ -3,6 +3,7 @@ pandas>=1.5,<2
 matplotlib>=3.6.0,<4
 nbval
 sphinx
+sphinx-rtd-theme
 sphinx_rtd_theme
 nbsphinx
 pandoc

@@ -52,6 +52,7 @@
     "sphinx.ext.viewcode",
     "nbsphinx",
     "sphinx.ext.todo",
+    "sphinx_rtd_theme",
 ]
 
 

@@ -10,3 +10,4 @@ Examples
    demo_notebook
    online_retail_analysis
    demo_transformer_model_train_save_upload_to_openSearch
+   demo_ml_commons_integration
@@ -32,44 +32,44 @@ def upload_model(
     ) -> str:
         """
         This method uploads model into opensearch cluster using ml-common plugin's api.
-
         first this method creates a model id to store model metadata and then breaks the model zip file into
-        multiple chunks and then upload chunks into opensearch cluster.
+        multiple chunks and then upload chunks into opensearch cluster
 
-        Parameters
-        ----------
         :param model_path: path of the zip file of the model
         :type model_path: string
         :param model_config_path: filepath of the model metadata. A json file of model metadata is expected
+            Model metadata format example:
+            {
+                "name": "all-MiniLM-L6-v2",
+                "version": 1,
+                "model_format": "TORCH_SCRIPT",
+                "model_config": {
+                    "model_type": "bert",
+                    "embedding_dimension": 384,
+                    "framework_type": "sentence_transformers",
+                },
+            }
+
+            refer to:
+            https://opensearch.org/docs/latest/ml-commons-plugin/model-serving-framework/#upload-model-to-opensearch
         :type model_config_path: string
         :param isVerbose: if isVerbose is true method will print more messages. default False
         :type isVerbose: boolean
-
-        Returns
-        -------
         :return: returns the model_id so that we can use this for further operation.
         :rtype: string
-
         """
-
         return self._model_uploader._upload_model(
             model_path, model_config_path, isVerbose
         )
 
     def load_model(self, model_id: str) -> object:
         """
-        This method loads model into opensearch cluster using ml-common plugin's load model api.
+        This method loads model into opensearch cluster using ml-common plugin's load model api
 
-        Parameters
-        ----------
         :param model_id: unique id of the model
         :type model_id: string
-
-        Returns
-        -------
         :return: returns a json object, with task_id and status key.
         :rtype: object
-
         """
 
         API_URL = f"{ML_BASE_URI}/models/{model_id}/_load"
@@ -82,15 +82,10 @@ def load_model(self, model_id: str) -> object:
     def get_task_info(self, task_id: str) -> object:
         """
         This method return information about a task running into opensearch cluster (using ml commons api)
-        when we load a model.
+        when we load a model
 
-        Parameters
-        ----------
         :param task_id: unique id of the task
         :type task_id: string
-
-        Returns
-        -------
         :return: returns a json object, with detailed information about the task
         :rtype: object
         """
@@ -104,15 +99,10 @@ def get_task_info(self, task_id: str) -> object:
 
     def get_model_info(self, model_id: str) -> object:
         """
-        This method return information about a model uploaded into opensearch cluster (using ml commons api).
+        This method return information about a model uploaded into opensearch cluster (using ml commons api)
 
-        Parameters
-        ----------
         :param model_id: unique id of the model
         :type model_id: string
-
-        Returns
-        -------
         :return: returns a json object, with detailed information about the model
         :rtype: object
         """
@@ -128,15 +118,10 @@ def generate_embedding(self, model_id: str, sentences: List[str]) -> object:
         """
         This method return embedding for given sentences (using ml commons _predict api)
 
-        Parameters
-        ----------
         :param model_id: unique id of the nlp model
         :type model_id: string
         :param sentences: List of sentences
         :type sentences: list of string
-
-        Returns
-        -------
         :return: returns a json object `inference_results` which is a list of embedding results of given sentences
             every item has 4 properties: name, data_type, shape, data (embedding value)
         :rtype: object
@@ -156,15 +141,10 @@ def unload_model(self, model_id: str, node_ids: List[str] = []) -> object:
         """
         This method unloads a model from all the nodes or from the given list of nodes (using ml commons _unload api)
 
-        Parameters
-        ----------
         :param model_id: unique id of the nlp model
         :type model_id: string
         :param node_ids: List of nodes
         :type node_ids: list of string
-
-        Returns
-        -------
         :return: returns a json object with defining from which nodes the model has unloaded.
         :rtype: object
         """
@@ -187,15 +167,10 @@ def unload_model(self, model_id: str, node_ids: List[str] = []) -> object:
     def delete_model(self, model_id: str) -> object:
 
         """
-        This method deletes a model from opensearch cluster (using ml commons api).
+        This method deletes a model from opensearch cluster (using ml commons api)
 
-        Parameters
-        ----------
         :param model_id: unique id of the model
         :type model_id: string
-
-        Returns
-        -------
         :return: returns a json object, with detailed information about the deleted model
         :rtype: object
         """

@@ -49,8 +49,6 @@ def _upload_model(
         first this method creates a model id to store model metadata and then breaks the model zip file into
         multiple chunks and then upload chunks into cluster.
 
-        Parameters
-        ----------
         :param model_path: path of the zip file of the model
         :type model_path: string
         :param model_meta_path:
@@ -72,9 +70,6 @@ def _upload_model(
         :type model_meta_path: string
         :param isVerbose: if isVerbose is true method will print more messages
         :type isVerbose: bool
-
-        Returns
-        -------
         :return: returns model id which is created by the model metadata
         :rtype: string
         """

@@ -45,10 +45,8 @@ def __init__(
         """
         Description: Initiate a sentence transformer model class object. The model id will be used to download
         pretrained model from the hugging-face and served as the default name for model files, and the folder_path
-        will be the default location to store files generated in the following functions.
+        will be the default location to store files generated in the following functions
 
-        Parameters
-        ----------
         :param model_id: Optional, the huggingface mode id to download sentence transformer model,
             default model id: 'sentence-transformers/msmarco-distilbert-base-tas-b'
         :type model_id: string
@@ -61,9 +59,6 @@ def __init__(
                     training. But if the training process get interrupted in between, users can choose to
                     overwrite = True to restart the process
         :type overwrite: bool
-
-        Returns
-        -------
         :return: no return value expected
         :rtype: None
         """
@@ -250,8 +245,6 @@ def read_queries(self, read_path: str, overwrite: bool = False) -> pd.DataFrame:
         Read the queries generated from the Synthetic Query Generator (SQG) model, unzip files to current directory
         within synthetic_queries/ folder, output as a dataframe
 
-        Parameters
-        ----------
         :param read_path:
             required, path to the zipped file that contains generated queries, if None, raise exception
         :type read_path: string
@@ -260,9 +253,6 @@ def read_queries(self, read_path: str, overwrite: bool = False) -> pd.DataFrame:
             Default to set overwrite as false and if the folder is not empty, raise exception to recommend users
             to either clean up folder or enable overwriting is True
         :type overwrite: bool
-
-        Returns
-        -------
         :return: The dataframe of queries.
         :rtype: panda dataframe
         """
@@ -358,17 +348,12 @@ def load_sentence_transformer_example(
         """
         Create input data for training the model
 
-        Parameters
-        ----------
         :param query_df:
             required for loading sentence transformer examples
         :type query_df: pd.DataFrame
         :param use_accelerate:
             Optional, use accelerate to fine tune model. Default as false to not use accelerator
         :type use_accelerate: bool
-
-        Returns
-        -------
         :return: the list of train examples.
         :rtype: list
         """
@@ -408,8 +393,6 @@ def train_model(
         Description:
         Takes in training data and a sentence transformer url to train a custom semantic search model
 
-        Parameters
-        ----------
         :param train_examples:
             required, input for the sentence transformer model training
         :type train_examples: list of strings
@@ -435,9 +418,6 @@ def train_model(
         :param verbose:
             optional, use plotting to plot the training progress and printing more logs. Default as false
         :type verbose: bool
-
-        Returns
-        -------
         :return: the torch script format trained model.
         :rtype: .pt file
         """
@@ -669,8 +649,6 @@ def zip_model(
         Description:
         zip the model file and its tokenizer.json file to prepare to upload to the Open Search cluster
 
-        Parameters
-        ----------
         :param model_path:
             Optional, path to find the model file, if None, default as concatenate model_id and
             '.pt' file in current path
@@ -681,9 +659,6 @@ def zip_model(
         :param zip_file_name: str =None
             Optional, file name for zip file. if None, default as concatenate model_id and '.zip'
         :type zip_file_name: string
-
-        Returns
-        -------
         :return: no return value expected
         :rtype: None
         """
@@ -742,8 +717,6 @@ def save_as_pt(
         download sentence transformer model directly from huggingface, convert model to torch script format,
         zip the model file and its tokenizer.json file to prepare to upload to the Open Search cluster
 
-        Parameters
-        ----------
         :param sentences:
             Required, for example  sentences = ['today is sunny']
         :type sentences: List of string [str]
@@ -764,9 +737,6 @@ def save_as_pt(
             Optional, file name for zip file. e.g, "sample_model.zip". If None, default takes the model_id
             and add the extension with ".zip"
         :type zip_file_name: string
-
-        Returns
-        -------
         :return: the torch script format model
         :rtype: .pt model
         """
@@ -828,10 +798,8 @@ def set_up_accelerate_config(
     ) -> None:
         """
         get default config setting based on the number of GPU on the machine
-        if users require other configs, users can run !acclerate config for more options.
+        if users require other configs, users can run !acclerate config for more options
 
-        Parameters
-        ----------
         :param compute_environment:
             optional, compute environment type to run model, if None, default using 'LOCAL_MACHINE'
         :type compute_environment: string
@@ -845,9 +813,6 @@ def set_up_accelerate_config(
         :param verbose:
             optional, use printing more logs. Default as false
         :type verbose: bool
-
-        Returns
-        -------
         :return: no return value expected
         :rtype: None
         """
@@ -929,20 +894,17 @@ def make_model_config_json(
     ) -> None:
         """
         parse from config.json file of pre-trained hugging-face model to generate a ml-commons_model_config.json file. If all required
-        fields are given by users, use the given parameters and will skip reading the config.json,
+        fields are given by users, use the given parameters and will skip reading the config.json
 
-        Parameters
-        ----------
         :param model_name:
             Optional, The name of the model. If None, default to parse from model id, for example,
             'msmarco-distilbert-base-tas-b'
         :type model_name: string
         :param version_number:
             Optional, The version number of the model. default is 1
         :type version_number: string
-        :param embedding_dimension:
-            Optional, the embedding_dimension of the model. If None, parse embedding_dimension from the config file of
-             pre-trained hugging-face model, if not found, default to be 768
+        :param embedding_dimension: Optional, the embedding_dimension of the model. If None, parse embedding_dimension
+            from the config file of pre-trained hugging-face model, if not found, default to be 768
         :type embedding_dimension: int
         :param all_config:
             Optional, the all_config of the model. If None, parse all contents from the config file of pre-trained
@@ -955,9 +917,6 @@ def make_model_config_json(
         :param verbose:
             optional, use printing more logs. Default as false
         :type verbose: bool
-
-        Returns
-        -------
         :return: no return value expected
         :rtype: None
         """