-
Notifications
You must be signed in to change notification settings - Fork 61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
notebook_documentation #66
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ pandas>=1.5,<2 | |
matplotlib>=3.6.0,<4 | ||
nbval | ||
sphinx | ||
sphinx-rtd-theme | ||
sphinx_rtd_theme | ||
nbsphinx | ||
pandoc | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ | |
"sphinx.ext.viewcode", | ||
"nbsphinx", | ||
"sphinx.ext.todo", | ||
"sphinx_rtd_theme", | ||
] | ||
|
||
|
||
|
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,10 +45,8 @@ def __init__( | |
""" | ||
Description: Initiate a sentence transformer model class object. The model id will be used to download | ||
pretrained model from the hugging-face and served as the default name for model files, and the folder_path | ||
will be the default location to store files generated in the following functions. | ||
will be the default location to store files generated in the following functions | ||
|
||
Parameters | ||
---------- | ||
:param model_id: Optional, the huggingface mode id to download sentence transformer model, | ||
default model id: 'sentence-transformers/msmarco-distilbert-base-tas-b' | ||
:type model_id: string | ||
|
@@ -61,9 +59,6 @@ def __init__( | |
training. But if the training process get interrupted in between, users can choose to | ||
overwrite = True to restart the process | ||
:type overwrite: bool | ||
|
||
Returns | ||
------- | ||
:return: no return value expected | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please change it to "Description: Initiate a sentence transformer model object. The model id will be used to download pre-trained model from Huggingface and serve as the default name for model files, and the folder_path will be the default location to store files generated by the following functions" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. --> "overwrite: Optional, choose to overwrite the folder at folder path. Default as false. When training |
||
:rtype: None | ||
""" | ||
|
@@ -250,8 +245,6 @@ def read_queries(self, read_path: str, overwrite: bool = False) -> pd.DataFrame: | |
Read the queries generated from the Synthetic Query Generator (SQG) model, unzip files to current directory | ||
within synthetic_queries/ folder, output as a dataframe | ||
|
||
Parameters | ||
---------- | ||
:param read_path: | ||
required, path to the zipped file that contains generated queries, if None, raise exception | ||
:type read_path: string | ||
|
@@ -260,9 +253,6 @@ def read_queries(self, read_path: str, overwrite: bool = False) -> pd.DataFrame: | |
Default to set overwrite as false and if the folder is not empty, raise exception to recommend users | ||
to either clean up folder or enable overwriting is True | ||
:type overwrite: bool | ||
|
||
Returns | ||
------- | ||
:return: The dataframe of queries. | ||
:rtype: panda dataframe | ||
""" | ||
|
@@ -358,17 +348,12 @@ def load_sentence_transformer_example( | |
""" | ||
Create input data for training the model | ||
|
||
Parameters | ||
---------- | ||
:param query_df: | ||
required for loading sentence transformer examples | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. --> "required for loading training data" |
||
:type query_df: pd.DataFrame | ||
:param use_accelerate: | ||
Optional, use accelerate to fine tune model. Default as false to not use accelerator | ||
:type use_accelerate: bool | ||
|
||
Returns | ||
------- | ||
:return: the list of train examples. | ||
:rtype: list | ||
""" | ||
|
@@ -408,8 +393,6 @@ def train_model( | |
Description: | ||
Takes in training data and a sentence transformer url to train a custom semantic search model | ||
|
||
Parameters | ||
---------- | ||
:param train_examples: | ||
required, input for the sentence transformer model training | ||
:type train_examples: list of strings | ||
|
@@ -435,9 +418,6 @@ def train_model( | |
:param verbose: | ||
optional, use plotting to plot the training progress and printing more logs. Default as false | ||
:type verbose: bool | ||
|
||
Returns | ||
------- | ||
:return: the torch script format trained model. | ||
:rtype: .pt file | ||
""" | ||
|
@@ -669,8 +649,6 @@ def zip_model( | |
Description: | ||
zip the model file and its tokenizer.json file to prepare to upload to the Open Search cluster | ||
|
||
Parameters | ||
---------- | ||
:param model_path: | ||
Optional, path to find the model file, if None, default as concatenate model_id and | ||
'.pt' file in current path | ||
|
@@ -681,9 +659,6 @@ def zip_model( | |
:param zip_file_name: str =None | ||
Optional, file name for zip file. if None, default as concatenate model_id and '.zip' | ||
:type zip_file_name: string | ||
|
||
Returns | ||
------- | ||
:return: no return value expected | ||
:rtype: None | ||
""" | ||
|
@@ -742,8 +717,6 @@ def save_as_pt( | |
download sentence transformer model directly from huggingface, convert model to torch script format, | ||
zip the model file and its tokenizer.json file to prepare to upload to the Open Search cluster | ||
|
||
Parameters | ||
---------- | ||
:param sentences: | ||
Required, for example sentences = ['today is sunny'] | ||
:type sentences: List of string [str] | ||
|
@@ -764,9 +737,6 @@ def save_as_pt( | |
Optional, file name for zip file. e.g, "sample_model.zip". If None, default takes the model_id | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's torch.tensor(1) doing near line 744 below? |
||
and add the extension with ".zip" | ||
:type zip_file_name: string | ||
|
||
Returns | ||
------- | ||
:return: the torch script format model | ||
:rtype: .pt model | ||
""" | ||
|
@@ -828,10 +798,8 @@ def set_up_accelerate_config( | |
) -> None: | ||
""" | ||
get default config setting based on the number of GPU on the machine | ||
if users require other configs, users can run !acclerate config for more options. | ||
if users require other configs, users can run !acclerate config for more options | ||
|
||
Parameters | ||
---------- | ||
:param compute_environment: | ||
optional, compute environment type to run model, if None, default using 'LOCAL_MACHINE' | ||
:type compute_environment: string | ||
|
@@ -845,9 +813,6 @@ def set_up_accelerate_config( | |
:param verbose: | ||
optional, use printing more logs. Default as false | ||
:type verbose: bool | ||
|
||
Returns | ||
------- | ||
:return: no return value expected | ||
:rtype: None | ||
""" | ||
|
@@ -929,20 +894,17 @@ def make_model_config_json( | |
) -> None: | ||
""" | ||
parse from config.json file of pre-trained hugging-face model to generate a ml-commons_model_config.json file. If all required | ||
fields are given by users, use the given parameters and will skip reading the config.json, | ||
fields are given by users, use the given parameters and will skip reading the config.json | ||
|
||
Parameters | ||
---------- | ||
:param model_name: | ||
Optional, The name of the model. If None, default to parse from model id, for example, | ||
'msmarco-distilbert-base-tas-b' | ||
:type model_name: string | ||
:param version_number: | ||
Optional, The version number of the model. default is 1 | ||
:type version_number: string | ||
:param embedding_dimension: | ||
Optional, the embedding_dimension of the model. If None, parse embedding_dimension from the config file of | ||
pre-trained hugging-face model, if not found, default to be 768 | ||
:param embedding_dimension: Optional, the embedding_dimension of the model. If None, parse embedding_dimension | ||
from the config file of pre-trained hugging-face model, if not found, default to be 768 | ||
:type embedding_dimension: int | ||
:param all_config: | ||
Optional, the all_config of the model. If None, parse all contents from the config file of pre-trained | ||
|
@@ -955,9 +917,6 @@ def make_model_config_json( | |
:param verbose: | ||
optional, use printing more logs. Default as false | ||
:type verbose: bool | ||
|
||
Returns | ||
------- | ||
:return: no return value expected | ||
:rtype: None | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are the two packages with similar names both requires?
sphinx-rtd-theme
sphinx_rtd_theme
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part I'm not exactly sure. You can see two different installation instruction:
And I'm seeing bit different output than my end. So for which I just wanted to give a try with installing both.