# Manga Colorizer â€” LoRA Training on Kaggle (2x T4)

This notebook trains a style-specific LoRA on Kaggle using `kohya-ss/sd-scripts`.
Upload your preprocessed dataset (from `python main.py preprocess`) before running.

In [None]:
# Install kohya_ss training scripts and dependencies
!pip install -q torch torchvision --index-url https://download.pytorch.org/whl/cu118
!pip install -q xformers
!git clone https://github.com/kohya-ss/sd-scripts.git /kaggle/working/sd-scripts
!pip install -q -r /kaggle/working/sd-scripts/requirements.txt
!pip install -q accelerate safetensors

In [None]:
# === CONFIGURATION ===
# Modify these paths for your Kaggle dataset

BASE_MODEL = "gsdf/Counterfeit-V3.0"
DATASET_DIR = "/kaggle/input/your-dataset"  # Upload your dataset here
OUTPUT_DIR = "/kaggle/working/lora_output"
LORA_NAME = "manga_style_lora"

# Training hyperparameters (tuned for 2x T4, 200 images)
RESOLUTION = 512
BATCH_SIZE = 1
EPOCHS = 10
LEARNING_RATE = 1e-4
NETWORK_DIM = 32  # LoRA rank
NETWORK_ALPHA = 16

In [None]:
import json
import os
from pathlib import Path

# Create dataset metadata for kohya training
dataset_config = {
    "general": {
        "resolution": RESOLUTION,
        "shuffle_caption": False,
        "keep_tokens": 0,
    },
    "datasets": [
        {
            "subsets": [
                {
                    "image_dir": str(Path(DATASET_DIR) / "train_color"),
                    "conditioning_data_dir": str(Path(DATASET_DIR) / "train_bw"),
                    "caption_extension": ".txt",
                    "num_repeats": 5,
                }
            ]
        }
    ],
}

os.makedirs(OUTPUT_DIR, exist_ok=True)
config_path = f"{OUTPUT_DIR}/dataset_config.json"
with open(config_path, "w") as f:
    json.dump(dataset_config, f, indent=2)

# Create blank caption files (no text prompts needed)
color_dir = Path(DATASET_DIR) / "train_color"
if color_dir.exists():
    for img in color_dir.iterdir():
        if img.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}:
            caption_file = img.with_suffix(".txt")
            if not caption_file.exists():
                caption_file.write_text("")

print(f"Config saved to: {config_path}")
if color_dir.exists():
    print(f"Found {len(list(color_dir.glob('*.png')))} training images")

In [None]:
from huggingface_hub import hf_hub_download

model_path = hf_hub_download(
    repo_id="gsdf/Counterfeit-V3.0",
    filename="Counterfeit-V3.0_fix_fp16.safetensors",
    cache_dir="/kaggle/working/models",
)
print(f"Base model: {model_path}")

In [None]:
!accelerate launch \
    --num_processes=1 \
    --mixed_precision="fp16" \
    /kaggle/working/sd-scripts/train_network.py \
    --pretrained_model_name_or_path="{model_path}" \
    --dataset_config="{config_path}" \
    --output_dir="{OUTPUT_DIR}" \
    --output_name="{LORA_NAME}" \
    --save_model_as="safetensors" \
    --max_train_epochs={EPOCHS} \
    --learning_rate={LEARNING_RATE} \
    --optimizer_type="AdamW8bit" \
    --network_module="networks.lora" \
    --network_dim={NETWORK_DIM} \
    --network_alpha={NETWORK_ALPHA} \
    --train_batch_size={BATCH_SIZE} \
    --resolution="{RESOLUTION},{RESOLUTION}" \
    --mixed_precision="fp16" \
    --save_precision="fp16" \
    --xformers \
    --cache_latents \
    --gradient_checkpointing \
    --seed=42

In [None]:
import os

lora_path = f"{OUTPUT_DIR}/{LORA_NAME}.safetensors"
if os.path.exists(lora_path):
    size_mb = os.path.getsize(lora_path) / (1024 * 1024)
    print(f"LoRA saved: {lora_path} ({size_mb:.1f} MB)")
    print("Download this file and use with:")
    print("  python main.py colorize panel.png ref.png --lora path/to/lora.safetensors")
else:
    print("ERROR: LoRA file not found. Check training logs above.")
    for f in os.listdir(OUTPUT_DIR):
        print(f"  {f}")