In [0]:
# create the sentence transformer (can skip if model exist)

import mlflow
import mlflow.pyfunc
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np

class SentenceTransformerModel(mlflow.pyfunc.PythonModel):

    def load_context(self, context):
        # Load the sentence transformer model
        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  # Use your model name here

    def predict(self, context, model_input):
        def encode_text(text):
            try:
                return self.model.encode(text, show_progress_bar=False)
            except Exception as e:
                print(f"Error encoding text: {text}. Error: {str(e)}")
                return np.zeros(384)  # Assuming embedding size is 384

        # Apply the model to the input DataFrame
        text_series = model_input.iloc[:, 0]  # Assuming the first column is the text
        embeddings = text_series.apply(encode_text)
        return pd.DataFrame(embeddings.tolist())

# Create sample input and output
sample_input = pd.DataFrame({'text': ["This is a sample sentence"]})
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Create sample output by encoding the text
sample_output = pd.DataFrame([model.encode("This is a sample sentence").tolist()])

# Infer the model signature
from mlflow.models.signature import infer_signature
signature = infer_signature(sample_input, sample_output)

run_name="all-MiniLM-L6-v2-run"

# Log the model to MLflow
with mlflow.start_run(run_name=run_name) as run:
    mlflow.pyfunc.log_model(
        artifact_path="hugging_face_sentence_transformer_model",
        python_model=SentenceTransformerModel(),
        input_example=sample_input,
        signature=signature,
        registered_model_name="hugging_face_sentence_transformer_model"  # Specify the name in the registry
    )