In [1]:
import mlflow
from datasets import load_dataset
from mlflow import MlflowClient
from mlflow.exceptions import RestException
from peft import LoraConfig, get_peft_model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
    pipeline,
)

BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
BASE_MODEL_NAME = "TinyLlama-1.1B-Chat-v1.0"
MERGED_MODEL_NAME = BASE_MODEL_NAME + "-finetuned"
ADAPTER_OUTPUT_DIR = "./adapter_weights"
LOG_MERGED_MODEL = True

client = MlflowClient()

mlflow.set_experiment("TinyLlama-fine-tuning")


<Experiment: artifact_location='mlflow-artifacts:/mlflow', creation_time=1752871141895, experiment_id='12', last_update_time=1752871141895, lifecycle_stage='active', name='TinyLlama-fine-tuning', tags={'mlflow.domino.dataset_info': '68788688a685c05b1700ea8c-68788688a685c05b1700ea8b',
 'mlflow.domino.environment_id': '687895e6a685c05b1700eab5',
 'mlflow.domino.environment_revision_id': '6879b872da87d040dba4627b',
 'mlflow.domino.hardware_tier': 'gpu-small-k8s',
 'mlflow.domino.project_id': '68788685a685c05b1700ea86',
 'mlflow.domino.project_name': 'LLM',
 'mlflow.domino.run_id': '687a7ff3b2eee2648e0ace71',
 'mlflow.domino.run_number': '17',
 'mlflow.domino.user': 'integration-test',
 'mlflow.domino.user_id': '68788292fc17b3228539ea3e',
 'mlflow.source.type': 'NOTEBOOK',
 'mlflow.user': 'integration-test'}>

In [3]:
def is_model_registered(model_name: str) -> bool:
    try:
        client.get_registered_model(model_name)
        return True
    except RestException:
        return False


def tokenize(example):
    tokens = tokenizer(example["quote"], truncation=True, padding="max_length", max_length=128)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens


In [4]:
# Step 1: Retrieve the base model and tokenizer

base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
print("Downloaded base model and tokenizer")

if not is_model_registered(BASE_MODEL_NAME):
    print(f"Registering {BASE_MODEL_ID}...")

    with mlflow.start_run(run_name="log-base-model") as base_run:
        model_info = mlflow.transformers.log_model(
            transformers_model=pipeline("text-generation", model=base_model, tokenizer=tokenizer),
            tokenizer=tokenizer,
            artifact_path="base_model",
            input_example="What's the capital of France?"
        )
        mlflow.register_model(model_info.model_uri, BASE_MODEL_NAME)
        print(f"Registered base model: {BASE_MODEL_NAME}")

Downloaded base model and tokenizer
Registering TinyLlama/TinyLlama-1.1B-Chat-v1.0...


Device set to use cuda:0
2025/07/18 20:41:04 INFO mlflow.transformers.signature: Running model prediction to infer the model output signature with a timeout of 180 seconds. You can specify a different timeout by setting the environment variable MLFLOW_INPUT_EXAMPLE_INFERENCE_TIMEOUT.
  prediction = generate_signature_output(


Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

Device set to use cuda:0
Successfully registered model 'TinyLlama-1.1B-Chat-v1.0'.
2025/07/18 20:44:21 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: TinyLlama-1.1B-Chat-v1.0, version 1
Created version '1' of model 'TinyLlama-1.1B-Chat-v1.0'.


Registered base model: TinyLlama-1.1B-Chat-v1.0
🏃 View run log-base-model at: http://127.0.0.1:8768/#/experiments/12/runs/21e1e80f342a4fc5988c6da29b36971f
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/12


In [5]:
# Step 2: Apply LoRA adapter

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(base_model, lora_config)

print("LoRA adapter applied")

LoRA adapter applied


In [6]:
# Step 3: Prepare dataset and trainer

dataset = load_dataset("Abirate/english_quotes")['train'].train_test_split(test_size=0.1)
tokenized = dataset.map(tokenize)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=1,
    learning_rate=5e-5,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("Prepared dataset and trainer")

Map:   0%|          | 0/2257 [00:00<?, ? examples/s]

Map:   0%|          | 0/251 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Prepared dataset and trainer


In [7]:
# Step 4: Fine-tuning

with mlflow.start_run(run_name="adapter-finetune") as run:
    mlflow.log_params({
        "registered_base_model": BASE_MODEL_ID,
        "adapter_type": "LoRA",
        "learning_rate": training_args.learning_rate,
        "epochs": training_args.num_train_epochs
    })

    trainer.train()

    # Log adapter weights only
    model.save_pretrained(ADAPTER_OUTPUT_DIR)
    mlflow.log_artifacts(ADAPTER_OUTPUT_DIR, artifact_path="adapters")
    print("Logged adapter weights")

    # Optionally merge and register final model
    if LOG_MERGED_MODEL:
        merged_model = model.merge_and_unload()
        merged_info = mlflow.transformers.log_model(
            transformers_model=pipeline("text-generation", model=merged_model, tokenizer=tokenizer),
            tokenizer=tokenizer,
            artifact_path="merged_model",
            input_example="What's the capital of France?"
        )
        mlflow.register_model(merged_info.model_uri, MERGED_MODEL_NAME)
        print(f"Registered merged model: {MERGED_MODEL_NAME}")


Epoch,Training Loss,Validation Loss
1,2.2553,2.350652


Device set to use cuda:0


Logged adapter weights


2025/07/18 20:50:40 INFO mlflow.transformers.signature: Running model prediction to infer the model output signature with a timeout of 180 seconds. You can specify a different timeout by setting the environment variable MLFLOW_INPUT_EXAMPLE_INFERENCE_TIMEOUT.
  prediction = generate_signature_output(


Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

Device set to use cuda:0
Successfully registered model 'TinyLlama-1.1B-Chat-v1.0-finetuned'.
2025/07/18 20:53:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: TinyLlama-1.1B-Chat-v1.0-finetuned, version 1
Created version '1' of model 'TinyLlama-1.1B-Chat-v1.0-finetuned'.


Registered merged model: TinyLlama-1.1B-Chat-v1.0-finetuned
🏃 View run adapter-finetune at: http://127.0.0.1:8768/#/experiments/12/runs/f44274eed8fb41119dc390dfb0c0bfd1
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/12


In [2]:
# Clean up - delete registered models

for model_name in [BASE_MODEL_NAME, MERGED_MODEL_NAME]:
    try:
        client.delete_registered_model(name=model_name)
        print(f"Deleted registered model: {model_name}")
    except RestException as e:
        print(e)


RESOURCE_DOES_NOT_EXIST: Registered Model with name=TinyLlama-1.1B-Chat-v1.0 not found
RESOURCE_DOES_NOT_EXIST: Registered Model with name=TinyLlama-1.1B-Chat-v1.0-finetuned not found


In [10]:
# Clean up - delete experiment by id

client.delete_experiment(11)

In [11]:
# Clean up - delete experiment run by id

client.delete_run("ee2880594f034b988c41db2dfbbd8b44")