# Dungeon Meshi — character retrieval (TensorFlow)

A starter notebook that extracts panel crops, proposes head candidates with simple heuristics, computes embeddings with TensorFlow (MobileNetV2), and performs nearest-neighbor retrieval using scikit-learn.

Place your page images in a folder and update the `PAGES_DIR` variable in the first code cell.

In [None]:
# Dependencies and configuration
PAGES_DIR = '.data/pages'  # change this to where your JPGs are
PANELS_DIR = '.data/panels'
CROPS_DIR = '.data/crops'
import os
os.makedirs(PANELS_DIR, exist_ok=True)
os.makedirs(CROPS_DIR, exist_ok=True)
print('Setup done. Update PAGES_DIR if needed.')

OSError: [Errno 30] Read-only file system: '/mnt'

In [None]:
# Simple panel extraction and head-candidate extraction (OpenCV required)
import cv2, numpy as np, glob
from pathlib import Path

def extract_panels_from_page(page_path, out_dir, min_area=20000):
    img = cv2.imread(page_path)
    if img is None:
        return []
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, th = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
    closed = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    base = Path(page_path).stem
    saved = 0
    for i, cnt in enumerate(contours):
        area = cv2.contourArea(cnt)
        if area < min_area:
            continue
        x,y,w,h = cv2.boundingRect(cnt)
        crop = img[y:y+h, x:x+w]
        out_path = os.path.join(out_dir, f"{base}_panel_{i}.jpg")
        cv2.imwrite(out_path, crop)
        saved += 1
    return saved

page_paths = sorted(glob.glob(os.path.join(PAGES_DIR, '*.jpg')) + glob.glob(os.path.join(PAGES_DIR, '*.png')))
count = 0
for p in page_paths:
    count += extract_panels_from_page(p, PANELS_DIR)
print('Saved', count, 'panels to', PANELS_DIR)


In [None]:
# Heuristic head candidate extraction
import glob
panel_paths = sorted(glob.glob(os.path.join(PANELS_DIR, '*.jpg')))
count = 0
for p in panel_paths:
    img = cv2.imread(p)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, th = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(th, connectivity=8)
    base = Path(p).stem
    for i in range(1, num_labels):
        x,y,w,h,area = stats[i]
        if area < 800:
            continue
        aspect = w / float(h)
        if 0.35 < aspect < 1.8:
            pad_w = int(w*0.4); pad_h = int(h*0.6)
            x0 = max(0, x-pad_w); y0 = max(0, y-pad_h)
            x1 = min(img.shape[1], x+w+pad_w); y1 = min(img.shape[0], y+h+pad_h)
            crop = img[y0:y1, x0:x1]
            outp = os.path.join(CROPS_DIR, f"{base}_crop_{i}.jpg")
            cv2.imwrite(outp, crop)
            count += 1
print('Saved', count, 'candidate crops to', CROPS_DIR)


In [None]:
# Build embedding model (TensorFlow MobileNetV2) and compute embeddings
import tensorflow as tf
from tensorflow.keras import layers, models
from PIL import Image
import numpy as np, json

IMG_SIZE = 128
base = tf.keras.applications.MobileNetV2(input_shape=(IMG_SIZE,IMG_SIZE,3), include_top=False, weights='imagenet')
x = base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation=None)(x)
x = layers.Lambda(lambda t: tf.math.l2_normalize(t, axis=1))(x)
embed_model = models.Model(inputs=base.input, outputs=x)

crop_paths = sorted(glob.glob(os.path.join(CROPS_DIR, '*.jpg')))
print('Found', len(crop_paths), 'crops')
if len(crop_paths) > 0:
    def load_img(path):
        img = Image.open(path).convert('RGB').resize((IMG_SIZE,IMG_SIZE), Image.BICUBIC)
        return np.asarray(img)/255.0
    X = np.stack([load_img(p) for p in crop_paths], axis=0)
    embs = embed_model.predict(X, batch_size=64)
    np.save('/mnt/data/embeddings.npy', embs)
    with open('/mnt/data/crop_paths.json', 'w') as f:
        json.dump(crop_paths, f)
    print('Saved embeddings and paths')
else:
    print('No crops found; run earlier cells to produce crops.')


In [None]:
# Nearest-neighbor search with scikit-learn
from sklearn.neighbors import NearestNeighbors
embs = np.load('/mnt/data/embeddings.npy')
with open('/mnt/data/crop_paths.json','r') as f:
    crop_paths = json.load(f)

nn = NearestNeighbors(n_neighbors=50, metric='cosine')
nn.fit(embs)

# Example: set seed_path to one seed Izutsumi crop on your machine
seed_path = None
if seed_path:
    seed = load_img(seed_path)
    vec = embed_model.predict(seed[np.newaxis,...])
    dists, idxs = nn.kneighbors(vec, n_neighbors=30)
    import matplotlib.pyplot as plt
    from PIL import Image
    plt.figure(figsize=(12,8))
    for i, idx in enumerate(idxs[0]):
        im = Image.open(crop_paths[idx]).convert('RGB')
        plt.subplot(5,6,i+1); plt.imshow(im.resize((128,128))); plt.title(f"{dists[0,i]:.3f}"); plt.axis('off')
    plt.show()
else:
    print('Set seed_path to run a sample query (path to one Izutsumi crop)')
