In [None]:
# Auto-configure repo path and compute device (GPU/MPS/CPU)
import sys
from pathlib import Path

try:
    from utils.path_helpers import add_repo_root_to_sys_path
except Exception:
    cur = Path.cwd()
    for parent in [cur] + list(cur.parents):
        if (parent / "requirements.txt").exists() or (parent / ".git").exists():
            sys.path.insert(0, str(parent))
            break
    from utils.path_helpers import add_repo_root_to_sys_path

add_repo_root_to_sys_path()

from utils.device import get_device, backend_info, backend_name, ensure_seed, move_to
print(f"Using backend: {backend_info()}")
ensure_seed(42)

# If using torch, set default device (PyTorch 2.x convenience)
try:
    import torch  # noqa: F401
    if backend_name() in ("torch_cuda", "torch_mps") and hasattr(torch, "set_default_device"):
        torch.set_default_device("cuda" if backend_name() == "torch_cuda" else "mps")
        print(f"torch default device set to {torch.get_default_device()}")
except Exception:
    pass

# Project 16: Instruction Tuning Mistral 7B

## Goal
Fine-tune real Mistral 7B using MLX on your M4 Mac.

## Learning Objectives
- LoRA parameter-efficient fine-tuning
- Instruction dataset preparation
- Training loop for LLMs
- Before/after evaluation

## Configuration
```
Model: Mistral 7B
Method: LoRA (rank 32)
Batch size: 32-64
Dataset: ~10k instruction examples
Memory usage: ~20-30GB
Training time: Hours per run
```

In [1]:
# Setup
import mlx.core as mx
import mlx.nn as nn
from mlx_lm import load, generate
import numpy as np
import json

print(f"MLX version: {mx.__version__}")
print(f"Device: {mx.default_device()}")

MLX version: 0.29.3
Device: Device(gpu, 0)
