# catnip
## Basic setup

### Colab only

In [None]:
import sys
import os

# clone repo
if not os.path.exists("/content/catnip"):
    !git clone -b yolo-bbo --recurse-submodules https://github.com/rifusaki/catnip.git
    %cd /content/catnip
else:
    %cd /content/catnip
    !git pull

# google auth
from google.colab import auth
auth.authenticate_user()

# gcsfuse
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# mount bucket
!mkdir -p /content/gcs
!gcsfuse --implicit-dirs catnip-data /content/gcs

# link bucket to expected data path in pipeline.yaml
# pipeline.yaml expects 'catnip-data/' in the repo root
!ln -s /content/gcs /content/catnip/catnip-data

# install packages if needed
%pip install ultralytics pydantic pydantic-settings omegaconf

In [None]:
# path configuration
from pathlib import Path

# define repo root
repo_root = Path("/content/catnip").resolve()

# add to sys.path
if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

# change working directory
os.chdir(repo_root)
print(f"Working directory set to: {os.getcwd()}")

### Local only

In [1]:
import os
import sys
from pathlib import Path

# after cloning rifusaki/catnip
# add repository root to path
repo_root = Path("..").resolve()

if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

# change working directory to repo root
os.chdir(repo_root)
print(f"Working directory set to: {os.getcwd()}")

Working directory set to: C:\Users\rifusaki\Desktop\catnip


## Data prep

### Paths and run config

In [11]:
# load configuration
from src.config import load_settings

settings = load_settings()

print(f"Data Root: {settings.paths.data}")
print(f"Manga Dir: {settings.paths.manga_dir}")
print(f"Annotations Dir: {settings.paths.annotations_dir}")
print(f"Models Dir: {settings.paths.model_dir}")
print(f"Runs Dir: {settings.paths.runs_dir}")
print(f"Output Dir: {settings.paths.output_dir}")
print(f"Label Studio export Dir: {settings.paths.ls_exports_dir}")

run_name = "0.11.0"

Data Root: catnip-data
Manga Dir: catnip-data\data\manga
Annotations Dir: catnip-data\data\annotations
Models Dir: catnip-data\models
Runs Dir: catnip-data\runs
Output Dir: catnip-data\results
Label Studio export Dir: catnip-data\data\ls-exports


In [5]:
# prepare dataset for YOLO

# we create a local 'yolo_dataset' folder with symlinks to the actual data
# this ensures a standard structure (images/labels) regardless of the source layout
# in my case I have manga/annotations instead of images/labels
from src.training.preparation import safe_symlink

dataset_root = repo_root / "yolo_dataset"
dataset_root.mkdir(exist_ok=True)

images_link = dataset_root / "images"
labels_link = dataset_root / "labels"

# symlink the actual data to this local structure
safe_symlink(settings.paths.manga_dir, images_link)
safe_symlink(settings.paths.annotations_dir, labels_link)

Created symlink: C:\Users\rifusaki\Desktop\catnip\yolo_dataset\images -> C:\Users\rifusaki\Desktop\catnip\catnip-data\data\manga
Created symlink: C:\Users\rifusaki\Desktop\catnip\yolo_dataset\labels -> C:\Users\rifusaki\Desktop\catnip\catnip-data\data\annotations


### Training prep

In [6]:
# for some reason the YOLO export from Label Studio doesn't include the file paths
# and since my images are not in a flat structure, I cannot use it directly
# so, I export the JSON which includes the paths and convert it to YOLO format here
from src.convert_labels import convert_label_studio_to_yolo

# define path to Label Studio export (adjust filename as needed)
ls_export_dir = settings.paths.ls_exports_dir
json_files = list(ls_export_dir.glob("*.json"))

if json_files:
    # pick the latest one or specific one
    json_file = sorted(json_files)[-1] 
    print(f"Found Label Studio export: {json_file}")
    
    convert_label_studio_to_yolo(
        json_file, 
        settings.paths.annotations_dir, 
        { "izutsumi": 0, "izutsumi_face": 1, "thistle": 2, "kabru": 3 }
    )
else:
    print(f"No Label Studio JSON export found in {ls_export_dir}")

Found Label Studio export: catnip-data\data\ls-exports\251228_v1.json
Processed 76 tasks. Labels saved to catnip-data\data\annotations


In [7]:
# Generate Training List
from src.training.preparation import generate_training_list

# Define config directory for YOLO artifacts
config_dir = settings.paths.data / "yoloConfig"
config_dir.mkdir(parents=True, exist_ok=True)

# Determine filename
if 'json_file' in locals() and json_file.exists():
    train_list_name = f"{json_file.stem}_train.txt"
else:
    raise RuntimeError("No Label Studio JSON export found; cannot determine training list name.")
    # train_list_name = "train.txt"  # --- IGNORE ---

train_list_path = config_dir / train_list_name

# generate list using the symlinked 'images' directory
# this ensures paths in the txt file match what YOLO sees in 'yolo_dataset'
# note: We use the *symlinked* path for generation so the txt file contains local paths
train_list_path = generate_training_list(
    images_link, 
    labels_link, 
    train_list_path, 
    force_regenerate=True
)

generating new training list: catnip-data\yoloConfig\251228_v1_train.txt
found 2752 total images in 'images' directory.
generated catnip-data\yoloConfig\251228_v1_train.txt
   - labeled images (subset): 92
   - unlabeled images (skipped): 2660


In [13]:
# create dataset.yaml
import importlib
import src.training.preparation
importlib.reload(src.training.preparation)
from src.training.preparation import create_dataset_yaml

dataset_yaml_path = create_dataset_yaml(
    path=dataset_root, # Point to the local dataset root containing images/ and labels/
    train_path=train_list_path,
    val_path=train_list_path,
    names={0: 'izutsumi', 1: 'izutsumi_face', 2: 'thistle', 3: 'kabru'}
)

with open(dataset_yaml_path, 'r') as f:
    print(f.read())

created dataset.yaml
names:
  0: izutsumi
  1: izutsumi_face
  2: thistle
  3: kabru
path: C:\Users\rifusaki\Desktop\catnip\yolo_dataset
train: C:\Users\rifusaki\Desktop\catnip\catnip-data\yoloConfig\251228_v1_train.txt
val: C:\Users\rifusaki\Desktop\catnip\catnip-data\yoloConfig\251228_v1_train.txt



## Model training

In [None]:
from ultralytics import YOLO
import torch

# Try to import Intel Extension for PyTorch
try:
    import intel_extension_for_pytorch as ipex
    print("Intel Extension for PyTorch loaded")
except ImportError:
    print("Intel Extension for PyTorch not found")

# Determine device
if hasattr(torch, 'xpu') and torch.xpu.is_available():
    device = 'xpu'
else:
    device = settings.params.device

print(f"Using device: {device}")

model = YOLO("yolo11s.pt") 

results = model.train(
    data=str(dataset_yaml_path),
    epochs=100,
    imgsz=settings.params.img_size,
    project=str(settings.paths.runs_dir),
    name=run_name,
    device=device,
    cache=False,
    exist_ok=True
)

Ultralytics 8.3.241  Python-3.13.11 torch-2.6.0 CPU (Intel Core i5-8250U 1.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=0.11.0, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0,

KeyboardInterrupt: 

In [None]:
# save best model
from src.training.preparation import save_best_model

save_best_model(
    project_dir=settings.paths.runs_dir,
    run_name=run_name,
    target_dir=settings.paths.model_dir,
    target_name=f"yolo11_izutsumi_{run_name}.pt"
)

## Inference

In [None]:
from ultralytics import YOLO
from src.output.output import save_inference_results
import torch

# Try to import Intel Extension for PyTorch
try:
    import intel_extension_for_pytorch as ipex
except ImportError:
    pass

# Determine device
if hasattr(torch, 'xpu') and torch.xpu.is_available():
    device = 'xpu'
else:
    device = settings.params.device # Fallback to config

print(f"Using device: {device}")

# Load the trained model
model_path = settings.paths.runs_dir / run_name / "weights" / "best.pt"

if not model_path.exists():
    print(f"Model not found at {model_path}")
else:
    model = YOLO(model_path)

    # Define output directories
    output_dir = settings.paths.output_dir
    output_dir.mkdir(parents=True, exist_ok=True)


    inference_source = settings.paths.manga_dir
    print(f"Running inference on {inference_source}...")

    # Run prediction
    results = model.predict(
        source=str(inference_source) + "/**/*.*",
        project=str(output_dir),
        name=run_name,
        save=False,
        save_txt=False,
        conf=0.25,
        stream=True,
        device=device
    )

    save_inference_results(results, output_dir, inference_source)