## MLflow and Transformers

In [1]:
import warnings
warnings.filterwarnings("ignore", category = UserWarning)

In [2]:
import transformers
import mlflow

task = "text2text-generation"

generation_pipeline = transformers.pipeline(
    task = task,
    model = "declare-lab/flan-alpaca-large",
)

input_example = ['prompt 1', "prompt 2", "prompt 3"]

parameters = {'max_length': 512, "do_sample": True, "temperature": 0.4}




## Introduction to Model Signatures in MLflow

In [3]:
# Generate the signature for the model 
signature = mlflow.models.infer_signature(
    input_example, 
    mlflow.transformers.generate_signature_output(generation_pipeline, input_example),
    parameters,
)

signature

inputs: 
  [string (required)]
outputs: 
  [string (required)]
params: 
  ['max_length': integer (default: 512), 'do_sample': boolean (default: True), 'temperature': double (default: 0.4)]

## Creating an experiment

In [4]:
mlflow.set_experiment("Transformers Introduction_1")

<Experiment: artifact_location='file:///e:/MLFlow/mlruns/127244647858896591', creation_time=1731907276037, experiment_id='127244647858896591', last_update_time=1731907276037, lifecycle_stage='active', name='Transformers Introduction_1', tags={}>

In [5]:
with mlflow.start_run():
    model_info = mlflow.transformers.log_model(
        transformers_model = generation_pipeline,
        artifact_path="text_generator",
        input_example=input_example,
        signature=signature,
        # Transformer model does not use Pandas Dataframe as input, internal input type conversion should be skipped.
        example_no_conversion = True,
        # Uncomment the following line to save the model in 'reference-only' mode:
        save_pretrained=False,
    )

  _save_example(mlflow_model, input_example, str(path), example_no_conversion)
2024/11/18 10:52:50 INFO mlflow.transformers: Skipping saving pretrained model weights to disk as the save_pretrained argumentis set to False. The reference to the HuggingFace Hub repository declare-lab/flan-alpaca-large will be logged instead.


Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]



In [6]:
# Load our pipeline as a generic python function
sentence_generator = mlflow.pyfunc.load_model(model_info.model_uri)

## Formatting Predictions for Tutorial Readability

In [7]:
def format_predictions(predictions):
    """
    Function for formatting the output for readability in a Jupyter Notebook
    """
    formatted_predictions = []

    for prediction in predictions:
        # Split the output into sentences, ensuring we don't split on abbreviations or initials
        sentences = [
            sentence.strip() + ("." if not sentence.endswith(".") else "")
            for sentence in prediction.split(". ")
            if sentence
        ]

        # Join the sentences with a newline character
        formatted_text = "\n".join(sentences)

        # Add the formatted text to the list
        formatted_predictions.append(formatted_text)

    return formatted_predictions

## Output Formatting

In [8]:
# Validate that our loaded pipeline, as a generic pyfunc, can produce an output that makes sense
predictions = sentence_generator.predict(
    data=[
        "I can't decide whether to go hiking or kayaking this weekend. Can you help me decide?",
        "Please tell me a joke about hiking.",
    ],
    params={"temperature": 0.7},
)

# Format each prediction for notebook readability
formatted_predictions = format_predictions(predictions)

for i, formatted_text in enumerate(formatted_predictions):
    print(f"Response to prompt {i+1}:\n{formatted_text}\n")




Response to prompt 1:
Hiking is a great way to get outdoors and enjoy nature.
You can go down a trail, find a spot to sit and enjoy the scenery, or take a kayak and get in some fresh air.
Both activities offer different perspectives and can be enjoyed together.

Response to prompt 2:
What did the hiker say to the bird? "I'm going to have to catch it.".

