In [3]:
# Set your metaflow profile if you have many metaflow config files
# %env METAFLOW_PROFILE=valayg

## Extract the Metaflow Related Tasks

In [4]:
from metaflow import load_model, Flow, Checkpoint, namespace, Run
import json
namespace(None)
# You can even instantiate another run over here like Run("FinetuneLlama3LoRA/<runid>")
latest_run = Flow("FinetuneLlama3LoRA").latest_successful_run

# Extract Checkpoints for the task
sft_task = latest_run["sft"].task
checkpoints = list(
    Checkpoint().list(task=sft_task)
)
# Extract the Base model for which we constructed the LoRA model
base_model_reference = latest_run.data.model_reference

In [5]:
print("checkpoints found for the Run:",latest_run, len(checkpoints),)

checkpoints found for the Run: Run('FinetuneLlama3LoRA/argo-finetunellama3lora-jx59g') 1


In [6]:
print("Base model used in the Run : ", )
print(json.dumps(base_model_reference["metadata"], indent=2))

Base model used in the Run : 
{
  "repo_id": "unsloth/Llama-3.2-1B-Instruct",
  "registry": "huggingface",
  "repo_type": "model"
}


## Load the Base Model Used in the Flow

In [7]:
model_path = f"checkpoints/{base_model_reference['name']}"
checkpoint_path = f"checkpoints/{latest_run.id}"

In [8]:
load_model(base_model_reference, model_path)

## Load the LoRA Checkpoint

In [9]:
load_model(checkpoints[0], checkpoint_path)

## Instantiate the Model With the LoRA Adapter From Checkpoints Save During Training

In [10]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(model_path)
config = PeftConfig.from_pretrained(checkpoint_path)
checkpoint_model = PeftModel.from_pretrained(model, 
    checkpoint_path,
    is_trainable=False
)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Identity()
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=32, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=32, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): lora.Linear(
            (base_layer): Linear(in_features=2048, out_features=512, bias=False)
    

## Instantiate the Model With the LoRA Adapter From Checkpoints After Completion of Training

In [12]:
# Final model saved to `self` in Metaflow like `self.model = current.model.save`
final_model_path = f"checkpoints/final_models/{latest_run.id}"
load_model(latest_run.data.model, final_model_path)


In [13]:
config = PeftConfig.from_pretrained(final_model_path)
final_model = PeftModel.from_pretrained(model, 
    final_model_path,
    is_trainable=False
)

In [14]:
final_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 2048)
        (layers): ModuleList(
          (0-15): 16 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_features=2048, ou