# Qwen finetune

## Information

Ignore augmentations with these names:
- _colorpatch
- _sam

In [20]:
from pathlib import Path
import shutil

# STAGE_DESCRIPTIONS = {
# 	 "stage_0": "I can see the base plate, therefore the object must be in stage_0",
# 	 "stage_1": "I can see the cylinder being on top of the base plate, therefore the object must be in stage_1",
# 	 "stage_2": "I can see the big metal piece being on top of the cylinder, therefore the object must be in stage_2",
# 	 "stage_3": "I can see the small metal piece being on placed in the middle of the big metal piece, therefore the object must be in stage_3",
# 	 "stage_4": "I can see the small metal ring being placed in the center of the small metal piece, therefore the object must be in stage_4",
# 	 "stage_5": "I can see the 3 screws being screwed on the metal plate, therefore the object must be in stage_5",
# 	 "stage_6": "I can see the daker metal plate being placed on top of the object, therefore the object must be in stage_6",
# 	 "stage_7": "I can see 5 screws being screwed on the metal plate, therefore the object must be in stage_7",
# }

STAGE_DESCRIPTIONS = {
	"stage_0": "I can see the base plate, which is the main piece of stage_0",
	"stage_1": "I can see the cylinder the main piece of stage_1",
	"stage_2": "I can see the big metal piecethe main piece of stage_2",
	"stage_3": "I can see the smaller metal thinner piece the main piece of stage_3",
	"stage_4": "I can see the small metal ring the main piece of stage_5",
	"stage_5": "I can see 3 screws",
	"stage_6": "I can see the darker metal plate the main piece of stage_6",
	"stage_7": "I can see 5 screws",
}


def process_stage(stage: str, src_root, dst_root) -> None:
	"""Copy images for one stage and write annotation files.

	`src_root` and `dst_root` can be Path objects **or** plain strings."""
	src_root = Path(src_root)
	dst_root = Path(dst_root)

	src_dir = src_root / "images" / stage
	if not src_dir.is_dir():
		print(f"Warning: {src_dir} is missing, skipping.")
		return

	dst_img_dir = dst_root / "images" / stage
	dst_ann_dir = dst_root / "anno" / stage
	dst_img_dir.mkdir(parents=True, exist_ok=True)
	dst_ann_dir.mkdir(parents=True, exist_ok=True)

	desc_text = STAGE_DESCRIPTIONS[stage]

	for img_path in src_dir.iterdir():
		if img_path.is_dir():
			continue
		name = img_path.name
		if "_colorpatch" in name or "_sam" in name:
			continue

		shutil.copy2(img_path, dst_img_dir / name.replace('.jpg', '_solo.jpg'))
		(dst_ann_dir / f"{img_path.stem}_solo.txt").write_text(desc_text, encoding="utf-8")


# Example manual call (now works with strings):
for stage in STAGE_DESCRIPTIONS:
	process_stage(
		stage,
		"/Users/georgye/Documents/repos/ethz/dslab25/assets/vacuum_pump/rendered_single",
		"/Users/georgye/Documents/repos/ethz/dslab25/training/qwen",
	)
	process_stage(
		stage,
		"/Users/georgye/Documents/repos/ethz/dslab25/assets/vacuum_pump/rendered",
		"/Users/georgye/Documents/repos/ethz/dslab25/training/qwen",
	)

# Converting whole folder

In [26]:
#!/usr/bin/env python3
"""
prepare_qwen25vl_data.py
------------------------
Build the folder hierarchy and JSONL annotation files required to fine-tune
Qwen-2.5-VL on your assembly-stage images.

Images:  {images_root}/stage_k/XXXX.jpg
Labels:  {labels_root}/stage_k/XXXX.txt   (same relative path & basename)
"""

import json
import random
import shutil
from pathlib import Path
from typing import List, Tuple

# ────────────────────────────────────────────────────────────────────────────────
SYSTEM_MESSAGE = (
	"You job is it tell me what you see in the image, and if possible what stage "
	"the object is currently in.\nHere are the possible states:\n"
	"\t'state_0': 'First part of the object: Base block metal piece',\n"
	"\t'state_1': 'Second part of the object: Cylinder metal piece which gets stick "
	"on the base block stage_0',\n"
	"\t'state_2': 'Third part of the object: A Big metal piece which gets stick on "
	"the cylinder piece of stage_1',\n"
	"\t'state_3': 'Fourth part of the object: A smaller thin metal piece which gets "
	"put onto the center of the big metal piece of stage_2',\n"
	"\t'state_4': 'Fifth part of the object: A tiny metal ring which gets placed "
	"onto the center of the thing metal piece of stage_3',\n"
	"\t'state_5': 'Sixth part of the object: 3 screws now get screwed onto the piece',\n"
	"\t'state_6': 'Seventh part of the object: A darker metal plate now gets placed "
	"on top of the piece',\n"
	"\t'state_7': 'Eighth part of the object: 5 screws now get screwed onto the piece'"
)

USER_PREFIX = (
	"Describe the object and, if you can, tell me which stage (state_0 … state_7) "
	"it is currently in."
)
# ────────────────────────────────────────────────────────────────────────────────


def collect_pairs(img_root: Path, lbl_root: Path) -> List[Tuple[Path, str]]:
	"""Return list of (image_path, label_text)."""
	img_root = Path(img_root)
	lbl_root = Path(lbl_root)

	pairs: List[Tuple[Path, str]] = []
	for img_path in img_root.rglob("*.jpg"):
		rel = img_path.relative_to(img_root)
		lbl_path = lbl_root / rel.with_suffix(".txt")
		if not lbl_path.exists():
			raise FileNotFoundError(f"Missing label file for image: {img_path}")
		text = lbl_path.read_text(encoding="utf-8").strip()
		pairs.append((img_path, text))

	if not pairs:
		raise RuntimeError("No image/label pairs found - check your paths.")
	return pairs


def split_pairs(
	pairs: List[Tuple[Path, str]], train_split: float, seed: int
) -> Tuple[List[Tuple[Path, str]], List[Tuple[Path, str]]]:
	random.Random(seed).shuffle(pairs)
	cut = int(len(pairs) * train_split)
	return pairs[:cut], pairs[cut:]


def write_jsonl(
	pairs: List[Tuple[Path, str]], out_dir: Path, img_root: Path
) -> None:
	"""
	Copy images into out_dir and create annotations.jsonl
	(images are kept at top level of out_dir; stage_*/ sub-folders are replicated).
	"""
	out_dir = Path(out_dir)
	img_root = Path(img_root)
	anno_path = out_dir / "annotations.jsonl"
	out_dir.mkdir(parents=True, exist_ok=True)

	with anno_path.open("w", encoding="utf-8") as f:
		for img_path, label in pairs:
			rel_img = img_path.relative_to(img_root)
			dest_img = out_dir / rel_img
			dest_img.parent.mkdir(parents=True, exist_ok=True)
			shutil.copy2(img_path, dest_img)

			record = {
				"image": str(rel_img).replace("\\", "/"),  # JSONL wants forward slashes
				"prefix": USER_PREFIX,
				"suffix": label,
			}
			f.write(json.dumps(record, ensure_ascii=False) + "\n")


# ─── USER‑SPECIFIC PATHS ────────────────────────────────────────────────────────
IMG_ROOT  = Path("/Users/georgye/Documents/repos/ethz/dslab25/training/qwen/images/augmented")
LBL_ROOT  = Path("/Users/georgye/Documents/repos/ethz/dslab25/training/qwen/annotation/augmented")
DATA_ROOT = Path("/Users/georgye/Documents/repos/ethz/dslab25/training/qwen/data")
TRAIN_SPLIT = 0.95
SEED = 32
# ────────────────────────────────────────────────────────────────────────────────

# 1. Pair up images and labels
pairs = collect_pairs(IMG_ROOT, LBL_ROOT)

# 2. Train / val split
train_pairs, val_pairs = split_pairs(pairs, TRAIN_SPLIT, SEED)

# 3. Write datasets
train_dir = DATA_ROOT / "train"
val_dir   = DATA_ROOT / "val"

for dirpath in (train_dir, val_dir):
		if dirpath.exists():
				shutil.rmtree(dirpath)  # start clean

write_jsonl(train_pairs, train_dir, IMG_ROOT)
write_jsonl(val_pairs,  val_dir,  IMG_ROOT)

# 4. Save the system prompt
(DATA_ROOT / "system_message.txt").write_text(SYSTEM_MESSAGE, encoding="utf-8")

# 5. Report
print(
		f"✅  Done.\n"
		f"• Train images: {len(train_pairs)} ➜ {train_dir}\n"
		f"• Val   images: {len(val_pairs)} ➜ {val_dir}\n"
		f"• JSONL files:  {train_dir/'annotations.jsonl'}, {val_dir/'annotations.jsonl'}"
)



✅  Done.
• Train images: 2675 ➜ /Users/georgye/Documents/repos/ethz/dslab25/training/qwen/data/train
• Val   images: 669 ➜ /Users/georgye/Documents/repos/ethz/dslab25/training/qwen/data/val
• JSONL files:  /Users/georgye/Documents/repos/ethz/dslab25/training/qwen/data/train/annotations.jsonl, /Users/georgye/Documents/repos/ethz/dslab25/training/qwen/data/val/annotations.jsonl
