# Catnip

## Setup

### Colab

In [None]:
# --- Scripts for Colab ---
# Clone repo
!git clone -b yolo-finetune --recurse-submodules https://github.com/rifusaki/catnip.git
%cd /content/catnip

# Authenticate with Google
from google.colab import auth
auth.authenticate_user()

# Install gcsfuse
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# Mount bucket
!mkdir -p /content/catnip/data
!gcsfuse --implicit-dirs catnip-data /content/catnip/data

# Install packages not included in Colab
%pip install ultralytics pydantic pydantic-settings omegaconf

import os
from pathlib import Path
os.chdir(str(Path.cwd())+'/catnip')

### Local

In [1]:
# Autoreload for debugging
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
os.chdir(Path.cwd().parent)

### General

In [2]:
# Dependencies and configuration
from src.config import settings, setup_dirs

print("Working directory set to:", Path.cwd())
izutsumiPaths, notIzutsumiPaths = setup_dirs()

Working directory set to: /Users/rifusaki/repos/catnip


## Pre-processing

### Panel extraction

In [None]:
from modules.coreMPE.src.adenzu_panel.image_processing import panel


_ = panel.extract_panels_for_images_in_folder_recursive(
    input_dir=str(settings.paths.pages_dir),
    output_dir=str(settings.paths.panels_dir),
    split_joint_panels=False,   # maps to --split-joint-panels
    fallback=True              # maps to --fallback
)

### Head crops

In [None]:
from src.preprocess.headExtraction import anime_extraction_recursive


valid_exts = {".jpg", ".jpeg", ".png"}
panel_paths = sorted(
    [p for p in settings.paths.panels_dir.iterdir() if p.suffix.lower() in valid_exts]
)
num_crops = anime_extraction_recursive()

print(f"Extracted {num_crops} faces")

## Catnip core

### Dataset preparation

In [14]:
from src.training.preparation import prepare_data

prepare_data(izutsumiPaths, notIzutsumiPaths, version=11)

Izutsumi: 181 | Not Izutsumi: 63
not implemented xd
Data prepared in data/recognition/izutsumiTraining


### Training

In [8]:
# For Apple Silicon
%env PYTORCH_ENABLE_MPS_FALLBACK=1
from ultralytics import YOLO

model = YOLO(settings.paths.model_dir/'yolo11x-cls.pt')
model.info(detailed=True)

env: PYTORCH_ENABLE_MPS_FALLBACK=1
layer                                    name                type  gradient  parameters               shape        mu     sigma
    0                     model.0.conv.weight              Conv2d     False        2592       [96, 3, 3, 3] -0.000392     0.151        float32
    1                       model.0.bn.weight         BatchNorm2d     False          96                [96]      1.42      1.48        float32
    1                         model.0.bn.bias         BatchNorm2d     False          96                [96]    -0.407      2.61        float32
    2                             model.0.act                SiLU     False           0                  []         -         -              -
    3                     model.1.conv.weight              Conv2d     False      165888     [192, 96, 3, 3] -0.000697    0.0306        float32
    4                       model.1.bn.weight         BatchNorm2d     False         192               [192]      4.73     

(176, 29637064, 0, 112.0110592)

In [20]:
# YOLOv8 data="config/izutsumiTraining11.yaml",
model.train(
        data="data/recognition/izutsumiTraining",
        epochs=200,
        imgsz=settings.params.img_size,
        batch=16,
        lr0=1e-24,       # lower LR for finetuning
        freeze=2,      # freeze backbone layers
        project="runs/izutsumi",
        name="v11",
        device='cpu', # "mps", "cuda", "cpu"
        workers=8,
        resume=False,
        cache=False     # True for Colab
    )

New https://pypi.org/project/ultralytics/8.3.218 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.203 üöÄ Python-3.12.11 torch-2.6.0 CPU (Apple M3)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data/recognition/izutsumiTraining, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=200, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=2, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=128, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=1e-24, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=data/models/yolo11x-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=v117, nbs=64, nms

KeyboardInterrupt: 

### Evaluation

In [None]:
# Evaluate
metrics = model.val()
print(metrics)  

### Prediction

In [None]:
# Predict on unseen images
model.predict(
    source="data/recognition/izutsumiTraining/val/images",
    save=True,
    conf=0.5
)