In [1]:
import mlflow
from datasets import load_dataset
from mlflow import MlflowClient
from mlflow.exceptions import RestException
from peft import LoraConfig, get_peft_model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
    pipeline,
)

BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
BASE_MODEL_NAME = "TinyLlama-1.1B-Chat-v1.0"
ADAPTER_MODEL_NAME = BASE_MODEL_NAME + "-adapter"
MERGED_MODEL_NAME = BASE_MODEL_NAME + "-merged"

ADAPTER_OUTPUT_DIR = "adapter"
ADAPTER_ARTIFACT_PATH = "adapter"

REGISTER_ADAPTER_MODEL = True
REGISTER_MERGED_MODEL = False

client = MlflowClient()

mlflow.set_experiment("TinyLlama-fine-tuning")


<Experiment: artifact_location='mlflow-artifacts:/mlflow', creation_time=1752871141895, experiment_id='12', last_update_time=1752871141895, lifecycle_stage='active', name='TinyLlama-fine-tuning', tags={'mlflow.domino.dataset_info': '68788688a685c05b1700ea8c-68788688a685c05b1700ea8b',
 'mlflow.domino.environment_id': '687895e6a685c05b1700eab5',
 'mlflow.domino.environment_revision_id': '6879b872da87d040dba4627b',
 'mlflow.domino.hardware_tier': 'gpu-small-k8s',
 'mlflow.domino.project_id': '68788685a685c05b1700ea86',
 'mlflow.domino.project_name': 'LLM',
 'mlflow.domino.run_id': '687a7ff3b2eee2648e0ace71',
 'mlflow.domino.run_number': '17',
 'mlflow.domino.user': 'integration-test',
 'mlflow.domino.user_id': '68788292fc17b3228539ea3e',
 'mlflow.source.type': 'NOTEBOOK',
 'mlflow.user': 'integration-test'}>

In [2]:
def is_model_registered(model_name: str) -> bool:
    try:
        client.get_registered_model(model_name)
        return True
    except RestException:
        return False


def tokenize(example):
    tokens = tokenizer(example["quote"], truncation=True, padding="max_length", max_length=128)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens


In [3]:
# Step 1: Retrieve the base model and tokenizer

base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
print("Downloaded base model and tokenizer")

if not is_model_registered(BASE_MODEL_NAME):
    print(f"Registering {BASE_MODEL_ID}...")

    with mlflow.start_run(run_name="log-base-model") as base_run:
        model_info = mlflow.transformers.log_model(
            transformers_model=pipeline("text-generation", model=base_model, tokenizer=tokenizer),
            tokenizer=tokenizer,
            artifact_path="base_model",
            input_example="What's the capital of France?"
        )
        mlflow.register_model(model_info.model_uri, BASE_MODEL_NAME)
        print(f"Registered base model: {BASE_MODEL_NAME}")

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Downloaded base model and tokenizer


In [4]:
# Step 2: Apply LoRA adapter

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]
)

peft_model = get_peft_model(base_model, lora_config)

print("LoRA adapter applied")

LoRA adapter applied


In [5]:
# Step 3: Prepare dataset and trainer

dataset = load_dataset("Abirate/english_quotes")['train'].train_test_split(test_size=0.1)
tokenized = dataset.map(tokenize)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=1,
    learning_rate=5e-5,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    report_to="none",
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("Prepared dataset and trainer")

README.md: 0.00B [00:00, ?B/s]

quotes.jsonl:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2508 [00:00<?, ? examples/s]

Map:   0%|          | 0/2257 [00:00<?, ? examples/s]

Map:   0%|          | 0/251 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Prepared dataset and trainer


In [7]:
# Step 4: Fine-tuning

with mlflow.start_run(run_name="adapter-finetune") as run:
    mlflow.log_params({
        "registered_base_model": BASE_MODEL_ID,
        "adapter_type": "LoRA",
        "learning_rate": training_args.learning_rate,
        "epochs": training_args.num_train_epochs
    })

    trainer.train()

    # Log adapter weights only
    peft_model.save_pretrained(ADAPTER_OUTPUT_DIR)
    mlflow.log_artifacts(local_dir=ADAPTER_OUTPUT_DIR, artifact_path=ADAPTER_ARTIFACT_PATH)
    print("Logged adapter weights")
    
    if REGISTER_ADAPTER_MODEL:
        source = f"runs:/{run.info.run_id}/{ADAPTER_ARTIFACT_PATH}"
        client.create_registered_model(ADAPTER_MODEL_NAME)
        client.create_model_version(ADAPTER_MODEL_NAME, source, run.info.run_id)
        print("Registered adapter weights")

    # Optionally merge and register final model
    if REGISTER_MERGED_MODEL:
        merged_model = peft_model.merge_and_unload()
        merged_info = mlflow.transformers.log_model(
            transformers_model=pipeline("text-generation", model=merged_model, tokenizer=tokenizer),
            tokenizer=tokenizer,
            artifact_path="merged_model",
            input_example="What's the capital of France?"
        )
        mlflow.register_model(merged_info.model_uri, MERGED_MODEL_NAME)
        print(f"Registered merged model: {MERGED_MODEL_NAME}")


Epoch,Training Loss,Validation Loss
1,2.1813,2.290818


2025/07/19 03:25:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: TinyLlama-1.1B-Chat-v1.0-adapter, version 1


Logged adapter weights
Registered adapter weights
🏃 View run adapter-finetune at: http://127.0.0.1:8768/#/experiments/12/runs/44a7aa84144749efb21ec713f4b8fefc
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/12


In [None]:
# Clean up - delete registered models

for model_name in [BASE_MODEL_NAME, ADAPTER_MODEL_NAME, MERGED_MODEL_NAME]:
    try:
        client.delete_registered_model(name=model_name)
        print(f"Deleted registered model: {model_name}")
    except RestException as e:
        print(e)


In [None]:
# Clean up - delete experiment by id

client.delete_experiment(11)

In [None]:
# Clean up - delete experiment run by id

client.delete_run("ee2880594f034b988c41db2dfbbd8b44")