1. 3Dメガネの色指定を画像から読み取る

In [None]:
!pip -q install opencv-python pillow numpy

In [1]:
import os, json, time
import numpy as np
import cv2
from PIL import Image
from google.colab import files

SPEC_JSON = "/content/filter_spec.json"
SPEC_DEBUG = "/content/debug_filter_swatches.png"

uploaded = files.upload()
if len(uploaded) != 1:
    raise RuntimeError("フェーズ1は『色指定用画像』を1枚だけアップロードしてください。")
FILTER_IMG = list(uploaded.keys())[0]

def read_rgb(path):
    bgr = cv2.imread(path, cv2.IMREAD_COLOR)
    if bgr is None:
        raise RuntimeError(f"画像を読めません: {path}")
    return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

def find_swatches(img_rgb):
    H, W = img_rgb.shape[:2]
    rois = [
        (int(H*0.55), H, 0, int(W*0.55)),  # bottom-left
        (0, H, 0, W),                      # fallback
    ]
    best = None
    for (y0,y1,x0,x1) in rois:
        roi = img_rgb[y0:y1, x0:x1].copy()
        hsv = cv2.cvtColor(roi, cv2.COLOR_RGB2HSV)
        S = hsv[:,:,1]; V = hsv[:,:,2]
        mask = ((S > 80) & (V > 60) & (V < 250)).astype(np.uint8) * 255
        k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, k, iterations=1)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, iterations=2)

        cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        rects = []
        for c in cnts:
            x,y,w,h = cv2.boundingRect(c)
            area = w*h
            if area < 300:
                continue
            ar = w / max(h,1)
            if ar < 0.5 or ar > 2.5:
                continue
            rects.append((area, x,y,w,h))
        rects.sort(reverse=True, key=lambda t: t[0])
        if len(rects) < 2:
            continue

        picks = []
        for area,x,y,w,h in rects[:12]:
            ok = True
            for _,px,py,pw,ph in picks:
                if not (x+w < px or px+pw < x or y+h < py or py+ph < y):
                    ok = False
                    break
            if ok:
                picks.append((area,x,y,w,h))
            if len(picks) >= 2:
                break
        if len(picks) < 2:
            continue

        picks = sorted(picks, key=lambda t: t[1])  # left->right
        (_,x1r,y1r,w1r,h1r),(_,x2r,y2r,w2r,h2r) = picks[:2]

        def mean_rgb(x,y,w,h):
            pad = max(1, int(min(w,h)*0.15))
            xx0 = max(0, x+pad); yy0 = max(0, y+pad)
            xx1 = min(roi.shape[1], x+w-pad); yy1 = min(roi.shape[0], y+h-pad)
            patch = roi[yy0:yy1, xx0:xx1]
            if patch.size == 0:
                patch = roi[y:y+h, x:x+w]
            return patch.reshape(-1,3).mean(axis=0)

        cL = mean_rgb(x1r,y1r,w1r,h1r)
        cR = mean_rgb(x2r,y2r,w2r,h2r)

        score = (w1r*h1r + w2r*h2r) + int(mask.sum()/255)
        cand = {
            "score": score,
            "roi_y0y1x0x1": (y0,y1,x0,x1),
            "left_rect_full_xywh": (x1r+x0, y1r+y0, w1r, h1r),
            "right_rect_full_xywh": (x2r+x0, y2r+y0, w2r, h2r),
            "left_mean_rgb": cL,
            "right_mean_rgb": cR,
        }
        if best is None or cand["score"] > best["score"]:
            best = cand
    return best

def classify_filter(rgb):
    r,g,b = rgb.tolist()
    if r > g*1.15 and r > b*1.15:
        return "red"
    if (g + b) > r*1.25:
        return "cyan"
    if b >= r and b >= g:
        return "cyan"
    return "red"

img_rgb = read_rgb(FILTER_IMG)
sw = find_swatches(img_rgb)
if sw is None:
    raise RuntimeError("色見本（2つの色付き四角）が検出できません。もっとはっきり写っている画像で試してください。")

left_filter  = classify_filter(sw["left_mean_rgb"])
right_filter = classify_filter(sw["right_mean_rgb"])
if left_filter == right_filter:
    left_filter, right_filter = "red", "cyan"  # 安全フォールバック

# debug image
dbg = img_rgb.copy()
x,y,w,h = sw["left_rect_full_xywh"]
cv2.rectangle(dbg, (x,y), (x+w,y+h), (255,0,0), 3)
x,y,w,h = sw["right_rect_full_xywh"]
cv2.rectangle(dbg, (x,y), (x+w,y+h), (0,255,0), 3)
Image.fromarray(dbg).save(SPEC_DEBUG)

spec = {
    "filter_spec_version": 1,
    "source_image": FILTER_IMG,
    "timestamp_unix": int(time.time()),
    "left_eye_filter": left_filter,
    "right_eye_filter": right_filter,
    "left_swatch": {"mean_rgb": [float(x) for x in sw["left_mean_rgb"]],
                    "rect_xywh": [int(x) for x in sw["left_rect_full_xywh"]]},
    "right_swatch":{"mean_rgb": [float(x) for x in sw["right_mean_rgb"]],
                    "rect_xywh": [int(x) for x in sw["right_rect_full_xywh"]]},
    "roi_used_y0y1x0x1": [int(x) for x in sw["roi_y0y1x0x1"]],
    "detection_method": "HSV saturation swatch detection (no OCR)"
}
with open(SPEC_JSON, "w", encoding="utf-8") as f:
    json.dump(spec, f, ensure_ascii=False, indent=2)

print("✅ Phase1 OK")
print(" saved:", SPEC_JSON)
print(" debug:", SPEC_DEBUG)
print(" filters:", left_filter, right_filter)

Saving IMG_2697.jpeg to IMG_2697 (1).jpeg
✅ Phase1 OK
 saved: /content/filter_spec.json
 debug: /content/debug_filter_swatches.png
 filters: red cyan


2.インタラクティブ画像を生成する

In [2]:
!pip -q install opencv-python pillow numpy transformers

In [11]:
import os, json, shutil, zipfile
import numpy as np
import cv2
from PIL import Image
from transformers import pipeline
from google.colab import files

SPEC_JSON = "/content/filter_spec.json"
if not os.path.exists(SPEC_JSON):
    raise RuntimeError("filter_spec.json が見つかりません。先にPhase1を実行してください。")

with open(SPEC_JSON, "r", encoding="utf-8") as f:
    spec = json.load(f)
left_filter  = spec["left_eye_filter"]
right_filter = spec["right_eye_filter"]

uploaded = files.upload()
target_paths = list(uploaded.keys())
if len(target_paths) == 0:
    raise RuntimeError("処理対象画像を1枚以上アップロードしてください。")

os.makedirs("/content/work2", exist_ok=True)
os.makedirs("/content/out_batch", exist_ok=True)

import numpy as np, cv2
from PIL import Image

# 1) Depthを“効く”形にする（clip + gamma）
def boost_depth(d01, clip_lo=0.08, clip_hi=0.92, gamma=0.7):
    d = np.clip((d01 - clip_lo) / (clip_hi - clip_lo + 1e-6), 0, 1)
    return np.power(d, gamma)

# 2) エッジをなるべく壊さず深度を滑らかに（軽量で効く）
def smooth_depth_bilateral(d01, d=7, sigmaColor=30, sigmaSpace=7):
    u8 = (d01 * 255).astype(np.uint8)
    u8f = cv2.bilateralFilter(u8, d=d, sigmaColor=sigmaColor, sigmaSpace=sigmaSpace)
    return u8f.astype(np.float32) / 255.0

# 3) Multi-plane（8層）で覗き込みを作る：穴が増えにくく立体が出る
def generate_views_multiplane(color_u8, depth01, outdir, nx=31, max_shift_ratio=0.03, planes=8):
    """
    - color_u8: RGB uint8
    - depth01: 0..1 float
    - max_shift_ratio: 画像幅の何%を最大シフトにするか（解像度に強い）
    """
    import os
    os.makedirs(outdir, exist_ok=True)

    H, W = depth01.shape
    xs0 = np.arange(W, dtype=np.int32)

    max_shift = int(W * max_shift_ratio)
    max_shift = max(12, min(max_shift, 120))  # 暴れすぎ防止の安全クリップ

    # 深度を層に量子化（手前=大きく動く）
    # d in [0,1] を 0..planes-1 に
    q = np.clip((depth01 * (planes - 1)).round().astype(np.int32), 0, planes - 1)

    # 各層のマスクを事前作成
    masks = [(q == k).astype(np.uint8) for k in range(planes)]

    for i in range(nx):
        t = i / (nx - 1)
        dx = int((t - 0.5) * 2 * max_shift)  # 反転なし

        # 奥→手前の順で上書き（簡易オクルージョン）
        out = np.zeros_like(color_u8)

        for k in range(planes):  # 0:奥 ... planes-1:手前
            # 層ごとの移動量（奥は小さく、手前は大きく）
            # k/(planes-1) が奥0→手前1
            scale = k / (planes - 1 + 1e-6)
            dxk = int(dx * scale)

            # その層だけワープして貼る
            warped = np.empty_like(color_u8)
            for y in range(H):
                xs = np.clip(xs0 + dxk, 0, W - 1)
                warped[y] = color_u8[y, xs]

            m = masks[k]
            # 3chに拡張してマスク合成
            out[m == 1] = warped[m == 1]

        Image.fromarray(out).save(f"{outdir}/v_{i:03d}.jpg", quality=92)

# （任意）生成前にコントラストを少し上げると見やすい
def clahe_on_luma(rgb_u8, clip=2.0, grid=(8,8)):
    lab = cv2.cvtColor(rgb_u8, cv2.COLOR_RGB2LAB)
    L,a,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=float(clip), tileGridSize=grid)
    L2 = clahe.apply(L)
    return cv2.cvtColor(cv2.merge([L2,a,b]), cv2.COLOR_LAB2RGB)

def read_rgb(path):
    bgr = cv2.imread(path, cv2.IMREAD_COLOR)
    if bgr is None:
        raise RuntimeError(f"画像を読めません: {path}")
    return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

def anaglyph_split(target_rgb_u8, left_filter, right_filter):
    img = target_rgb_u8.astype(np.float32)
    R = img[:,:,0]; G = img[:,:,1]; B = img[:,:,2]
    def eye_gray(f):
        return R if f=="red" else (G+B)/2.0
    L = eye_gray(left_filter); Rg = eye_gray(right_filter)
    left  = np.clip(np.stack([L,L,L],-1),0,255).astype(np.uint8)
    right = np.clip(np.stack([Rg,Rg,Rg],-1),0,255).astype(np.uint8)
    return left, right

depth_pipe = pipeline("depth-estimation", model="LiheYoung/depth-anything-large-hf")

def normalize01(a):
    a = a.astype(np.float32)
    return (a - a.min()) / (a.max() - a.min() + 1e-6)

def stereo_sgbm_depth01(left_u8, right_u8):
    grayL = cv2.cvtColor(left_u8, cv2.COLOR_RGB2GRAY)
    grayR = cv2.cvtColor(right_u8, cv2.COLOR_RGB2GRAY)
    stereo = cv2.StereoSGBM_create(
        minDisparity=0, numDisparities=96, blockSize=7,
        P1=8*3*7*7, P2=32*3*7*7,
        disp12MaxDiff=1, uniquenessRatio=10,
        speckleWindowSize=80, speckleRange=2,
        preFilterCap=31, mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY
    )
    disp = stereo.compute(grayL, grayR).astype(np.float32) / 16.0
    valid = disp > disp.min()
    if valid.any():
        dmin, dmax = disp[valid].min(), disp[valid].max()
        return np.clip((disp - dmin)/(dmax - dmin + 1e-6), 0, 1)
    return np.zeros_like(disp)

def generate_views(color_u8, depth01, outdir, nx=31, max_shift=28):
    os.makedirs(outdir, exist_ok=True)
    h, w = depth01.shape
    xs0 = np.arange(w, dtype=np.int32)
    for i in range(nx):
        t = i/(nx-1)
        dx = int((t - 0.5) * 2 * max_shift)   # 反転なし
        warped = np.empty_like(color_u8)
        for y in range(h):
            shift = (depth01[y] * dx).astype(np.int32)
            xs = np.clip(xs0 + shift, 0, w-1)
            warped[y] = color_u8[y, xs]
        Image.fromarray(warped).save(f"{outdir}/v_{i:03d}.jpg", quality=92)

HTML = """<!doctype html><html><head>
<meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/>
<title>{title}</title>
<style>
body{{margin:0;background:#000;color:#fff;font-family:system-ui,-apple-system,sans-serif}}
#top{{position:fixed;left:10px;top:10px;z-index:10;display:flex;gap:10px;align-items:center}}
button{{font-size:16px;padding:8px 10px}}
#meta{{font-size:12px;opacity:.85}}
img{{width:100vw;height:auto;display:block}}
</style></head><body>
<div id="top"><button onclick="enable()">Enable motion</button><div id="meta">{meta}</div></div>
<img id="view" src="views/v_{mid:03d}.jpg"/>
<script>
const nx={nx}; const yawMin=-6, yawMax=6; let yawSm=0; const alpha=0.15;
function clamp(x,a,b){{return Math.max(a,Math.min(b,x));}}
function idx(){{const t=clamp((yawSm-yawMin)/(yawMax-yawMin),0,1); return Math.round(t*(nx-1));}}
function tick(){{const i=idx(); document.getElementById("view").src="views/v_"+String(i).padStart(3,"0")+".jpg"; requestAnimationFrame(tick);}}
async function enable(){{
  if(typeof DeviceOrientationEvent?.requestPermission==="function"){{
    const r=await DeviceOrientationEvent.requestPermission(); if(r!=="granted") return;
  }}
  window.addEventListener("deviceorientation",(e)=>{{
    const g=(e.gamma??0);
    const y=clamp(g*0.1,yawMin,yawMax);
    yawSm = yawSm + alpha*(y - yawSm);
  }},true);
  tick();
}}
</script></body></html>"""

def write_viewer(outdir, title, meta, views_dir, nx=31, mid=15):
    os.makedirs(outdir, exist_ok=True)
    os.makedirs(os.path.join(outdir,"views"), exist_ok=True)
    for fn in sorted(os.listdir(views_dir)):
        shutil.copy2(os.path.join(views_dir, fn), os.path.join(outdir,"views",fn))
    with open(os.path.join(outdir,"index.html"), "w", encoding="utf-8") as f:
        f.write(HTML.format(title=title, meta=meta, nx=nx, mid=mid))

for tp in target_paths:
    base = os.path.splitext(os.path.basename(tp))[0]
    print("Processing:", tp)
    rgb = read_rgb(tp)
    left_u8, right_u8 = anaglyph_split(rgb, left_filter, right_filter)
    # left_u8 はアナグリフ分離後の left（RGB uint8）
    left_u8 = clahe_on_luma(left_u8, clip=2.0)

    # mono depth
    depth_mono = depth_pipe(Image.fromarray(left_u8))["depth"].resize((left_u8.shape[1], left_u8.shape[0]))
    # DepthAnythingでdepth_monoを作ったあと
    depth_mono01 = normalize01(np.array(depth_mono))     # 0..1
    depth_mono01 = boost_depth(depth_mono01, 0.08, 0.92, 0.7)
    depth_mono01 = smooth_depth_bilateral(depth_mono01)

    # stereo depth (baseline)
    depth_stereo01 = stereo_sgbm_depth01(left_u8, right_u8)
    # views
    mono_views_dir = f"/content/work2/{base}_mono_views"
    generate_views_multiplane(
        color_u8=left_u8,
        depth01=depth_mono01,
        outdir=mono_views_dir,
        nx=31,
        max_shift_ratio=0.035,  # 3.5%（効きを強くしたいなら0.04）
        planes=8
    )
    st_views   = f"/content/work2/{base}_stereo_views"
    generate_views(left_u8, depth_mono01, mono_views)
    generate_views(left_u8, depth_stereo01, st_views)

    meta_common = f"filters(spec): L={left_filter}/R={right_filter}"
    write_viewer(f"/content/out_batch/{base}/mono",   f"{base} mono",   meta_common+" | mono(DepthAnything)", mono_views)
    write_viewer(f"/content/out_batch/{base}/stereo", f"{base} stereo", meta_common+" | stereo(SGBM)",       st_views)

    Image.fromarray((depth_stereo01*255).astype(np.uint8)).save(f"/content/out_batch/{base}/stereo_depth_raw.png")
    Image.fromarray((depth_mono01*255).astype(np.uint8)).save(f"/content/out_batch/{base}/mono_depth_raw.png")
    # 目視用に深度も保存（効きの確認に超有用）
    Image.fromarray((depth_mono01*255).astype(np.uint8)).save(f"/content/out_batch/{base}/mono_depth_tuned.png")

# zip
zip_path = "/content/out_batch_bundle.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
    for root, _, fs in os.walk("/content/out_batch"):
        for fn in fs:
            p = os.path.join(root, fn)
            z.write(p, arcname=p.replace("/content/",""))

print("✅ Phase2 OK ->", zip_path)
files.download(zip_path)

Saving IMG_2692.jpeg to IMG_2692 (3).jpeg


Device set to use cpu


Processing: IMG_2692 (3).jpeg
✅ Phase2 OK -> /content/out_batch_bundle.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>