In [1]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [2]:
from models.modeling_xvla import XVLA
from models.processing_xvla import XVLAProcessor

model = XVLA.from_pretrained("2toINF/X-VLA-SoftFold")
processor = XVLAProcessor.from_pretrained("2toINF/X-VLA-SoftFold", use_fast=True)

Florence2ForConditionalGeneration has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From ðŸ‘‰v4.50ðŸ‘ˆ onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


In [3]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    lora_alpha=16,
    r=8,
    bias="none",
    target_modules="all-linear",
    modules_to_save=["transformer.soft_prompt_hub", 
                        "transformer.action_encoder", 
                        "transformer.action_decoder"],
)
model = get_peft_model(model, lora_config)

In [4]:
model = model.to("cuda")

In [5]:
from todo import Observation, Action, Trainer

trainer = Trainer(
    model=model,
    processor=processor,
)

In [None]:
import torch
torch.set_float32_matmul_precision("high")

fit = torch.compile(
    trainer.fit, 
    # fullgraph=True, 
    # dynamic=False, 
    mode="max-autotune",
    # disable=True,
)

In [16]:
fit(
    observation=Observation.sample(),
    action=Action.sample(),
)

tensor(1155.5436, device='cuda:0', grad_fn=<AddBackward0>)

In [20]:
import torch

with torch.profiler.profile() as prof:
    fit(
        observation=Observation.sample(),
        action=Action.sample(),
    )
prof.export_chrome_trace("trace-train.json")


In [None]:
%%timeit -n 1

fit(
    observation=Observation.sample(),
    action=Action.sample(),
)

In [24]:
model.device

device(type='cuda', index=0)

In [2]:
from todo import compute_actions

compute_actions_ = torch.compile(
    compute_actions,
    mode="max-autotune",
)


KeyboardInterrupt



In [28]:
%%timeit -n 1

compute_actions_(
    model=model,
    processor=processor,
    observation=Observation.sample(),
)

The slowest run took 4.85 times longer than the fastest. This could mean that an intermediate result is being cached.
115 ms Â± 47.5 ms per loop (mean Â± std. dev. of 7 runs, 1 loop each)
