We’re on OpenShift AI workbench.

We’ll fine-tune Llama 3.1 8B Instruct using Ray (as in the Red Hat article).

This notebook is independent: you don’t need modules 1–4.

In [None]:
import os
import sys
from pathlib import Path
from dataclasses import dataclass

# Figure out project root (works in container or local)
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR
if NOTEBOOK_DIR.name == "05_llama3_openshift_ai":
    PROJECT_ROOT = NOTEBOOK_DIR.parents[2]  # .../MLforEng

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("PROJECT_ROOT:", PROJECT_ROOT)

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from mlforeng.models import get_model_spec


@dataclass
class LlamaFineTuneConfig:
    model_name: str = "llama3_8b_instruct"
    dataset_uri: str = "s3://mybucket/llama3-dataset.jsonl"  # example
    output_dir: str = "/mnt/models/llama3-finetuned"        # PVC or S3 path
    num_gpus: int = 4
    num_epochs: int = 3
    per_device_batch_size: int = 4


cfg = LlamaFineTuneConfig()
cfg


In [None]:
spec = get_model_spec(cfg.model_name)
spec, spec.hf_model_id, spec.extra


In [None]:
RUN_ON_OPENSHIFT = os.getenv("RUN_ON_OPENSHIFT_AI", "false").lower() == "true"

try:
    import ray
    from transformers import AutoTokenizer, AutoModelForCausalLM
    from datasets import load_dataset
    HAVE_LLM_STACK = True
    LLM_IMPORT_ERROR = None
except Exception as e:
    HAVE_LLM_STACK = False
    LLM_IMPORT_ERROR = e

print("RUN_ON_OPENSHIFT:", RUN_ON_OPENSHIFT)
print("HAVE_LLM_STACK:", HAVE_LLM_STACK)
if not HAVE_LLM_STACK:
    print("LLM stack not available here. Import error:", repr(LLM_IMPORT_ERROR))


In [None]:
def dry_run_llama_pipeline(cfg: LlamaFineTuneConfig):
    print("DRY RUN ONLY (no Llama, no Ray).")
    print("If this was OpenShift AI, we would:")
    print(f"- spin up Ray cluster")
    print(f"- load HF model: {get_model_spec(cfg.model_name).hf_model_id}")
    print(f"- fine-tune on dataset: {cfg.dataset_uri}")
    print(f"- save to: {cfg.output_dir}")

dry_run_llama_pipeline(cfg)


In [None]:
if not RUN_ON_OPENSHIFT:
    print("Not running on OpenShift AI (RUN_ON_OPENSHIFT_AI != 'true'). Skipping real fine-tuning.")
elif not HAVE_LLM_STACK:
    print("LLM stack (ray/transformers/datasets) not available. Skipping real fine-tuning.")
else:
    print("OK: running real Llama 3.1 fine-tuning on OpenShift AI...")
    import time

    # === This is where you paste/adapt the Ray + HF code from the article ===
    # Example sketch (NOT full code):

    ray.init(address="auto")
    print("Ray cluster resources:", ray.cluster_resources())

    tokenizer = AutoTokenizer.from_pretrained(spec.hf_model_id)
    model = AutoModelForCausalLM.from_pretrained(spec.hf_model_id)

    dataset = load_dataset("json", data_files={"train": cfg.dataset_uri})

    # ... tokenization, collator, Ray Trainer or HF Trainer setup ...
    # trainer = TransformersTrainer(...)
    # result = trainer.fit()

    # Simulate long run for demo
    time.sleep(1)
    print("Fine-tuning completed (placeholder). Checkpoints saved to:", cfg.output_dir)


In [None]:
from pathlib import Path
from mlforeng.registry import register_external_model  # you create this

logical_name = "llama3_8b_instruct_supportbot"

if RUN_ON_OPENSHIFT and HAVE_LLM_STACK:
    # Real path where trainer saved the checkpoint
    model_path = cfg.output_dir
else:
    # Placeholder for local dev so the code path is testable
    model_path = "/mnt/models/llama3-finetuned-FAKE"

print("Registering logical model name:", logical_name, "->", model_path)
registered_path = register_external_model(logical_name, model_path)
registered_path
