In [1]:
from unsloth_mlx import (
    FastLanguageModel,
    SFTTrainer,
    TrainingArguments,    
    prepare_dataset,    
    get_chat_template,
    format_chat_template,
    create_training_data,
    save_model_hf_format,
    export_to_gguf,
    get_training_config,
)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
max_seq_length = 7096
# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="mlx-community/Llama-3.2-1B-Instruct-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,    
)


Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 133152.51it/s]


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

LoRA configuration set: rank=16, alpha=16, modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], dropout=0


In [4]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.2",
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [6]:
# Load dataset
from datasets import (
    load_dataset,
    Split,
)


dataset = load_dataset("json", data_files="datasets/frends-bpmn-dataset.jsonl", split=Split.TRAIN)
#dataset = dataset.map(formatting_prompts_func, batched = True,)

dataset_val = load_dataset("json", data_files="datasets/frends-bpmn-dataset-val.jsonl")
#dataset_val = dataset_val.map(formatting_prompts_func, batched = True,)

In [7]:
dataset[1]

{'instruction': 'Create a Frends process that assigns a variable and returns it as output.',
 'input': '',
 'output': 'Here\'s a simple Frends process definition that assigns a variable and returns it:\n\n```json\n{\n "Processes": [{\n "Name": "Fine Tuning: simple",\n "Version": 1,\n "ElementParameters": [\n {\n "Id": "StartEvent_1",\n "Type": 0,\n "SelectedTypeId": "ManualTrigger",\n "Name": "Manual Trigger"\n },\n {\n "Id": "Activity_0zi7vwm",\n "Type": 12,\n "Parameters": {\n "variableName": "result",\n "variableExpression": {\n "mode": "text",\n "value": "ok"\n },\n "shouldAssignVariable": {\n "mode": "toggle",\n "value": true\n }\n },\n "Name": "Assign result"\n },\n {\n "Id": "Event_19glogw",\n "Type": 5,\n "Parameters": {\n "expression": {\n "mode": "csharp",\n "value": "#var.result"\n }\n }\n }\n ],\n "TriggersJson": "[{\\"$type\\":\\"ManualTrigger\\",\\"name\\":\\"Manual Trigger\\",\\"id\\":\\"StartEvent_1\\"}]"\n }]\n}```\n\nThis process uses a Manual Trigger to start, assign

In [8]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    val_dataset = dataset_val,
    #dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,        
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Trainer initialized:
  Output dir: outputs
  Adapter path: outputs/adapters
  Learning rate: 0.0002
  Iterations: 60
  Batch size: 2
  LoRA r=16, alpha=16
  Native training: True
  LR scheduler: linear
  Grad checkpoint: False


In [9]:
trainer_stats = trainer.train()

mx.metal.device_info is deprecated and will be removed in a future version. Use mx.device_info instead.


Starting Fine-Tuning

[Using Native MLX Training]

Applying LoRA adapters...
Applying LoRA to 16 layers: {'rank': 16, 'scale': 1.0, 'dropout': 0, 'keys': ['self_attn.q_proj', 'self_attn.k_proj', 'self_attn.v_proj', 'self_attn.o_proj', 'mlp.gate_proj', 'mlp.up_proj', 'mlp.down_proj']}
✓ LoRA applied successfully to 16 layers
  Trainable LoRA parameters: 224
Preparing training data...
  Detected format: alpaca
✓ Prepared 4 training samples
  Saved to: outputs/train.jsonl
✓ Created validation set (copied from train)

Training configuration:
  Iterations: 60
  Batch size: 2
  Learning rate: 0.0002
  LR scheduler: linear
  Grad checkpoint: True
  Adapter file: outputs/adapters/adapters.safetensors

Loaded 4 training samples, 4 validation samples
Starting training loop...
Starting training..., iters: 60


Calculating loss...: 100%|██████████| 2/2 [00:00<00:00,  2.01it/s]

Iter 1: Val loss 2.631, Val took 0.997s





Iter 1: Train loss 2.803, Learning Rate 2.000e-04, It/sec 0.698, Tokens/sec 532.861, Trained Tokens 763, Peak mem 2.618 GB
Iter 2: Train loss 2.203, Learning Rate 1.967e-04, It/sec 0.725, Tokens/sec 497.095, Trained Tokens 1449, Peak mem 2.618 GB
Iter 3: Train loss 1.703, Learning Rate 1.933e-04, It/sec 0.790, Tokens/sec 541.727, Trained Tokens 2135, Peak mem 2.618 GB
Iter 4: Train loss 1.518, Learning Rate 1.900e-04, It/sec 0.720, Tokens/sec 549.054, Trained Tokens 2898, Peak mem 2.708 GB
Iter 5: Train loss 0.990, Learning Rate 1.867e-04, It/sec 0.740, Tokens/sec 564.480, Trained Tokens 3661, Peak mem 2.708 GB
Iter 6: Train loss 0.697, Learning Rate 1.833e-04, It/sec 0.795, Tokens/sec 545.583, Trained Tokens 4347, Peak mem 2.708 GB
Iter 7: Train loss 0.550, Learning Rate 1.800e-04, It/sec 0.735, Tokens/sec 561.128, Trained Tokens 5110, Peak mem 2.708 GB
Iter 8: Train loss 0.289, Learning Rate 1.767e-04, It/sec 0.798, Tokens/sec 547.724, Trained Tokens 5796, Peak mem 2.708 GB
Iter 9: T

Calculating loss...: 100%|██████████| 2/2 [00:00<00:00,  2.14it/s]

Iter 60: Val loss 0.004, Val took 0.941s





Iter 60: Train loss 0.004, Learning Rate 3.333e-06, It/sec 0.778, Tokens/sec 533.714, Trained Tokens 43470, Peak mem 2.708 GB
Saved final weights to outputs/adapters/adapters.safetensors.
  Adapter config saved to: outputs/adapters/adapter_config.json

Training Complete!
  Adapters saved to: outputs/adapters


In [10]:
from mlx_lm import generate

FastLanguageModel.for_inference(model)
prompt = "Generate sample Frends BPMN process. Give it a name FirstProcess. Output in JSON only."
messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

response = generate(
    model.model, tokenizer,
    prompt=formatted_prompt,
    max_tokens=10000,
    verbose=False,
)
print(response)

Inference mode enabled with KV caching
Here's a simple Frends process definition that creates a simple flow:

```json
{
 "Processes": [{
 "Name": "FirstProcess",
 "Version": 1,
 "ElementParameters": [
 {
 "Id": "StartEvent_1",
 "Type": 0,
 "SelectedTypeId": "ManualTrigger",
 "Name": "Manual Trigger"
 },
 {
 "Id": "Activity_0zi7vwm",
 "Type": 12,
 "Parameters": {
 "variableName": "result",
 "variableExpression": {
 "mode": "text",
 "value": "ok"
 },
 "shouldAssignVariable": {
 "mode": "toggle",
 "value": true
 }
 },
 "Name": "Assign result"
 },
 {
 "Id": "Event_19glogw",
 "Type": 5,
 "Parameters": {
 "expression": {
 "mode": "csharp",
 "value": "#var.result"
 }
 }
 }
 ],
 "TriggersJson": "[{\"$type\":\"ManualTrigger\",\"name\":\"Manual Trigger\",\"id\":\"StartEvent_1\"}]"
 }]
}```
