In [2]:
from mlflow.tracking import MlflowClient
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, pipeline
import mlflow.transformers


# Input arguments
model_dir = "/mnt/data/test-fe8a4e/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6"
model_id = "TinyLlama_TinyLlama-1.1B-Chat-v1.0"
revision = "fe8a4ea1ffedaf415f4da2f062534de366a451e6"
model_type = "LLM"


# Load the model and tokenizer from model_dir based on model_type
if model_type == "LLM":
    model = AutoModelForCausalLM.from_pretrained(model_dir)
else:
    model = AutoModel.from_pretrained(model_dir)
    
tokenizer = AutoTokenizer.from_pretrained(model_dir)
print("Loaded model and tokenizer")

with mlflow.start_run() as run:
    # Log the model
    mlflow.transformers.log_model(
        transformers_model={"model": model, "tokenizer": tokenizer},
        artifact_path="model",
        task="text-generation",
        input_example="Hello, how are you?",
    )
    print("Logged model")

    # TODO Check for metadata conflicts if the registered model already exists.
    # For example, the model_id must match the existing tag.

    # Register the model
    result = mlflow.register_model(
        model_uri=f"runs:/{run.info.run_id}/model",
        name=model_id,
    )
    print(f"Registered model={model_id}, version={result.version}")
    
client = MlflowClient()

# Tag model ID
client.set_registered_model_tag(name=model_id, key="mlflow.domino.model_id", value=model_id)

# Tag model type
client.set_registered_model_tag(name=model_id, key="mlflow.domino.model_type", value=model_type)
    
# Tag model version
client.set_model_version_tag(
    name=model_id,
    version=result.version,
    key="mlflow.domino.model_version",
    value=revision,
)

# Update description
client.update_registered_model(
    name=model_id,
    description=f"{model_id} from Hugging Face"
)


Device set to use cuda:0


Loaded model and tokenizer


2025/07/26 08:44:55 INFO mlflow.transformers.signature: Running model prediction to infer the model output signature with a timeout of 180 seconds. You can specify a different timeout by setting the environment variable MLFLOW_INPUT_EXAMPLE_INFERENCE_TIMEOUT.
  prediction = generate_signature_output(


Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

Device set to use cuda:0


Logged model


Registered model 'TinyLlama_TinyLlama-1.1B-Chat-v1.0' already exists. Creating a new version of this model...
2025/07/26 08:47:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: TinyLlama_TinyLlama-1.1B-Chat-v1.0, version 2
Created version '2' of model 'TinyLlama_TinyLlama-1.1B-Chat-v1.0'.


Registered model=TinyLlama_TinyLlama-1.1B-Chat-v1.0, version=2
🏃 View run whimsical-colt-607 at: http://127.0.0.1:8768/#/experiments/17/runs/846fa217aca349e780d00fcfb1b2992f
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/17
Tagged mlflow.domino.model_id={model_id}
Tagged mlflow.domino.model_type=LLM
Tagged mlflow.domino.model_version={revision}


NameError: name 'description' is not defined