In [14]:
import json

# Load existing scenes.json
with open("/content/scenes.json") as f:
    dataset = json.load(f)

sft_data = []

for item in dataset:
    sft_data.append({
        "input": item["prompt"] + "\nHTML_START\n",
        "output": item["html"] + "\nHTML_END"
    })


In [15]:
from huggingface_hub import login

# Uncomment and add your Hugging Face token.
# login(token="INSERT_TOKEN_HERE") 

In [16]:
# Base model performance.

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "Qwen/Qwen3-1.7B-Base"

# Load model
model = AutoModelForCausalLM.from_pretrained(model_name)
model = model.to("cuda")  # move model to GPU

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Prepare input
prompt = "A scene with trees and light snow\nHTML_START\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


A scene with trees and light snow
HTML_START
<svg width="100%" height="100%" viewBox="0 0 1000 1000" xmlns="http://www.w3.org/2000/svg">
  <defs>
    <linearGradient id="gradient" x1="0%" y1="0%" x2="100%" y2="100%">
      <stop offset="0%" style="stop-color:rgb(255,255,255);stop-opacity:1" />
      <stop offset="100%" style="stop-color:rgb(255,255,255);stop-opacity:0" />
    </linearGradient>
  </defs>
  <g transform="translate(0,0)">
    <path d="M 0 0 L 1000 0 L 1000 1000 L 0 


In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
import torch

# Load tokenizer and model
model_name = "Qwen/Qwen3-1.7B-Base"  # adjust if your HF repo differs
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

# Prepare for LoRA
model = prepare_model_for_kbit_training(model)  # for 4-bit/8-bit training

# LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # typical for Qwen
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [18]:
import json
from datasets import Dataset
from transformers import AutoTokenizer

# Load your original scenes JSON
with open("/content/scenes.json") as f:
    raw_dataset = json.load(f)

# Convert to SFT format with HTML start/end tokens
sft_list = []
for item in raw_dataset:
    sft_list.append({
        "input": item["prompt"] + "\nHTML_START\n",
        "output": item["html"] + "\nHTML_END"
    })

# Create Hugging Face Dataset object
dataset = Dataset.from_list(sft_list)

model_name = "Qwen/Qwen3-1.7B-Base"  # replace with your model
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

def tokenize(example):
    # Combine input + output as one sequence for causal LM training
    return tokenizer(
        example["input"] + example["output"],
        truncation=True,
        max_length=1024
    )

tokenized_dataset = dataset.map(tokenize)

Map:   0%|          | 0/208 [00:00<?, ? examples/s]

In [21]:
# Split 80% train, 20% validation
split_dataset = tokenized_dataset.train_test_split(test_size=0.2)

def add_labels(batch):
    batch["labels"] = batch["input_ids"].copy()  # for causal LM
    return batch

split_dataset = split_dataset.map(add_labels)

Map:   0%|          | 0/166 [00:00<?, ? examples/s]

Map:   0%|          | 0/42 [00:00<?, ? examples/s]

In [22]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./qwen_sft_vr",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    fp16=True,
    save_steps=500,
    logging_steps=50,
    save_total_limit=2,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=split_dataset["train"],
    eval_dataset=split_dataset["test"]
)

trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.


Step,Training Loss
50,0.3741
100,0.0737


TrainOutput(global_step=126, training_loss=0.18868177939975073, metrics={'train_runtime': 87.2327, 'train_samples_per_second': 5.709, 'train_steps_per_second': 1.444, 'total_flos': 936279755108352.0, 'train_loss': 0.18868177939975073, 'epoch': 3.0})

In [24]:
import shutil
from google.colab import files

# Zip the folder
shutil.make_archive("qwen_sft_vr", 'zip', "./qwen_sft_vr")

# Download
files.download("qwen_sft_vr.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [23]:
prompt = "A scene with pyramids at sunset and dust storm\nHTML_START\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=1200)
print(tokenizer.decode(outputs[0]))

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


A scene with pyramids at sunset and dust storm
HTML_START
<!DOCTYPE html>
<html>
  <head>
    <script src="https://aframe.io/releases/1.7.0/aframe.min.js"></script>
    <script src="https://unpkg.com/aframe-environment-component@1.5.x/dist/aframe-environment-component.min.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/c-frame/aframe-particle-system-component@1.2.0/dist/aframe-particle-system-component.min.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/c-frame/aframe-extras@7.5.0/dist/aframe-extras.min.js"></script>
  </head>
  <body>
    <a-scene>
      <a-entity environment="preset: egypt"></a-entity>
      <a-entity position="0 2.25 -15" particle-system="preset: dust"></a-entity>
    </a-scene>
  </body>
</html>

HTML_END

Assistant: <html>
  <head>
    <script src="https://aframe.io/releases/1.7.0/aframe.min.js"></script>
    <script src="https://unpkg.com/aframe-environment-component@1.5.x/dist/aframe-environment-component.min.js"></script>
    <script src="