In [89]:
from sentence_transformers import SentenceTransformer

import mlflow
from mlflow.tracking.client import MlflowClient
from mlflow.pyfunc import load_model

# Constants

In [90]:
MLFLOW_TRACKING_URI="http://127.0.0.1:5000"

MLFLOW_EXPERIMENT_NAME="finetuned_sent_bert_v1"

MODEL_NAME = "paraphrase-mpnet-base-v2"

# Set MLFlow related values before training

In [91]:
mlflow_experiment = None

# Set the Endpoint for MLFlow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# Try to fetch the experiment via name, None will be returned if the name does not exist
# mlflow_experiment = mlflow.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME)

# Create experiment if does not already exist
# mlflow_experiment_id = mlflow.create_experiment(MLFLOW_EXPERIMENT_NAME)
mlflow_experiment = mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

2023/07/12 09:32:50 INFO mlflow.tracking.fluent: Experiment with name 'finetuned_sent_bert_v1' does not exist. Creating a new experiment.


# Track the fine-tuning process for sentence-transformer model under experiment

[MLFlow Automatic Logging](https://mlflow.org/docs/latest/tracking.html#automatic-logging:~:text=for%20each%20run-,Automatic%20Logging,-Automatic%20logging%20allows)

In [92]:
fine_tuned_model = None
mlflow_run_id = None


model = SentenceTransformer(MODEL_NAME)

# Automated logging allows you to log metrics, parameters, and models without the need for explicit log statements. 
# Refer to the link mentioned above to view the list of Libraries supported

    

mlflow.autolog(log_input_examples=False, log_model_signatures=True, 
               log_models=False, log_datasets=True, disable=False, 
               exclusive=False, disable_for_unsupported_versions=False, 
               silent=False)

with mlflow.start_run(experiment_id=mlflow_experiment.experiment_id) as run:
    print("Fine-tuning started")
    ## Perform Finetuning
    #
    # X_train, Y_train = load_train_dataset()
    # perform_fine_tuning(model, X_train)
    mlflow.log_metric("RMSE", "0.34")
    mlflow.log_param("lr", "0.1")
    
    mlflow_run_id = run.info.run_id
    
    # Log and Register the model in MLFlow registry
    
    mlflow.sentence_transformers.log_model(model=model, artifact_path=f"models/{MODEL_NAME}", registered_model_name=MODEL_NAME)
    print("Fine-tuning completed!")


2023/07/12 09:33:07 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/07/12 09:33:07 INFO mlflow.tracking.fluent: Autologging successfully enabled for transformers.


Fine-tuning started
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Successfully registered model 'paraphrase-mpnet-base-v2'.
2023/07/12 09:33:19 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: paraphrase-mpnet-base-v2, version 1


Fine-tuning completed!


Created version '1' of model 'paraphrase-mpnet-base-v2'.


# Fetch the models from Model Registry

In [98]:
sentences = ["This is an example sentence", "Each sentence is converted"]

def fetch_registered_model_by_name(name):
    for reg_model_info in mlflow.search_registered_models(filter_string=f"name LIKE '{name}%'"):
        print(reg_model_info)
        for model_version in reg_model_info.latest_versions:
            registered_model = mlflow.sentence_transformers.load_model(f'models:/{reg_model_info.name}/{model_version.version}')
            embeddings = registered_model_v1.encode(sentences)
            print(f"Embeddings generated via {reg_model_info.name}:v_{model_version.version}\n{embeddings}")

fetch_registered_model_by_name("para")

<RegisteredModel: aliases={}, creation_timestamp=1689134599584, description='', last_updated_timestamp=1689134599605, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1689134599605, current_stage='None', description='', last_updated_timestamp=1689134599605, name='paraphrase-mpnet-base-v2', run_id='411153fefc7848ecb07b9738f898dd54', run_link='', source='mlflow-artifacts:/965049582430110854/411153fefc7848ecb07b9738f898dd54/artifacts/models/paraphrase-mpnet-base-v2', status='READY', status_message='', tags={}, user_id='', version='1'>], name='paraphrase-mpnet-base-v2', tags={}>


2023/07/12 09:35:14 INFO mlflow.sentence_transformers: 'models:/paraphrase-mpnet-base-v2/1' resolved as 'mlflow-artifacts:/965049582430110854/411153fefc7848ecb07b9738f898dd54/artifacts/models/paraphrase-mpnet-base-v2'


Embeddings generated via paraphrase-mpnet-base-v2:v_1
[[ 0.14321968 -0.23929456 -0.0517806  ...  0.06134125  0.08304735
  -0.16086945]
 [ 0.08879659 -0.11505436 -0.02012645 ...  0.02235661 -0.1832357
  -0.13690618]]


In [100]:
mlflow.MlflowClient().transition_model_version_stage(name=MODEL_NAME, version=1, stage="Staging")

<ModelVersion: aliases=[], creation_timestamp=1689134599605, current_stage='Staging', description='', last_updated_timestamp=1689134749272, name='paraphrase-mpnet-base-v2', run_id='411153fefc7848ecb07b9738f898dd54', run_link='', source='mlflow-artifacts:/965049582430110854/411153fefc7848ecb07b9738f898dd54/artifacts/models/paraphrase-mpnet-base-v2', status='READY', status_message='', tags={}, user_id='', version='1'>

In [101]:
mlflow.MlflowClient().delete_model_version(MODEL_NAME, 1)