# Week 4 – Virtual Tour / Three.js Export
Run the incremental SfM, export a lightweight bundle for the web, and generate a minimal Three.js viewer.

In [1]:
from pathlib import Path
import sys

# Locate project root and assets
PROJECT_ROOT = Path.cwd().resolve()
if not (PROJECT_ROOT / "assets").exists() and (PROJECT_ROOT.parent / "assets").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

ASSETS_DIR = PROJECT_ROOT / "assets" / "wall_b"
OUTPUT_DIR = PROJECT_ROOT / "outputs" / "reconstruction"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

ASSETS_DIR, OUTPUT_DIR

(PosixPath('/Users/reyyan/cv-project/assets/wall_b'),
 PosixPath('/Users/reyyan/cv-project/outputs/reconstruction'))

In [2]:
from src.multi_view_sfm import run_incremental_sfm

result = run_incremental_sfm(
    asset_dir=ASSETS_DIR,
    detector="SIFT",
    ratio_thresh=0.75,
    refine=True,
    min_correspondences=12,
    return_tracks=True,
)

result.stats

{'images': 7,
 'registered': 7,
 'skipped': [],
 'points': 908,
 'pose_inliers': {0: 0, 1: 0, 2: 22, 3: 56, 4: 36, 5: 71, 6: 89},
 'retriangulated': 486}

In [3]:
import json
import numpy as np
from src.interpolation import rotation_to_quaternion


def camera_center(R: np.ndarray, t: np.ndarray) -> list[float]:
    return (-R.T @ t).ravel().tolist()


bundle = {
    "intrinsics": result.K.tolist(),
    "images": [p.name for p in result.image_paths or []],
    "cameras": [],
    "points": [],
}

for idx, cam in enumerate(result.poses):
    if not cam.registered:
        continue
    bundle["cameras"].append(
        {
            "index": idx,
            "image": cam.image_path.name if cam.image_path else None,
            "R": cam.R.tolist(),
            "t": cam.t.ravel().tolist(),
            "center": camera_center(cam.R, cam.t),
            "quat": rotation_to_quaternion(cam.R).tolist(),
        }
    )

for pt, rgb in zip(result.points_3d, result.colors_rgb.astype(int)):
    bundle["points"].append({"xyz": [float(v) for v in pt.tolist()], "rgb": [int(c) for c in rgb.tolist()]})

json_path = OUTPUT_DIR / "virtual_tour_data.json"
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(bundle, f, indent=2)
print(f"Wrote {len(bundle['cameras'])} cameras and {len(bundle['points'])} points -> {json_path}")

html_path = OUTPUT_DIR / "virtual_tour_viewer.html"
html = """<!doctype html>
<html lang='en'>
<head>
  <meta charset='UTF-8'>
  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
  <title>Virtual Tour Viewer</title>
  <style>
    body, html { margin: 0; padding: 0; overflow: hidden; background: #0a0a0f; }
    #info { position: absolute; top: 12px; left: 12px; color: #f0f0f0; font-family: monospace; z-index: 1; }
  </style>
  <link rel="icon" href="data:,">
  <script type="importmap">{
    "imports": {
      "three": "https://cdn.jsdelivr.net/npm/three@0.160.0/build/three.module.js",
      "three/examples/jsm/": "https://cdn.jsdelivr.net/npm/three@0.160.0/examples/jsm/"
    }
  }</script>
</head>
<body>
  <div id='info'>Drag: orbit, Scroll: zoom</div>
  <canvas id='c'></canvas>
  <script type='module'>
    import * as THREE from 'three';
    import { OrbitControls } from 'three/examples/jsm/controls/OrbitControls.js';

    const canvas = document.getElementById('c');
    const renderer = new THREE.WebGLRenderer({canvas, antialias: true});
    const scene = new THREE.Scene();
    scene.background = new THREE.Color(0x0a0a0f);
    const camera = new THREE.PerspectiveCamera(60, 2, 0.1, 5000);
    const controls = new OrbitControls(camera, renderer.domElement);

    fetch('virtual_tour_data.json').then(r => r.json()).then(data => {
      const pts = data.points;
      const positions = new Float32Array(pts.length * 3);
      const colors = new Float32Array(pts.length * 3);
      let i = 0;
      for (const p of pts) {
        positions[3*i] = p.xyz[0];
        positions[3*i+1] = p.xyz[1];
        positions[3*i+2] = p.xyz[2];
        colors[3*i] = p.rgb[0] / 255;
        colors[3*i+1] = p.rgb[1] / 255;
        colors[3*i+2] = p.rgb[2] / 255;
        i++;
      }
      const geom = new THREE.BufferGeometry();
      geom.setAttribute('position', new THREE.BufferAttribute(positions, 3));
      geom.setAttribute('color', new THREE.BufferAttribute(colors, 3));
      const mat = new THREE.PointsMaterial({size: 2.2, vertexColors: true, sizeAttenuation: true});
      scene.add(new THREE.Points(geom, mat));

      const camGeom = new THREE.SphereGeometry(4, 10, 10);
      const camMat = new THREE.MeshBasicMaterial({color: 0xffffff});
      for (const c of data.cameras) {
        const m = new THREE.Mesh(camGeom, camMat);
        m.position.set(c.center[0], c.center[1], c.center[2]);
        scene.add(m);
      }

      geom.computeBoundingBox();
      const bb = geom.boundingBox;
      const center = new THREE.Vector3();
      bb.getCenter(center);
      const size = new THREE.Vector3();
      bb.getSize(size);
      const radius = Math.max(size.x, size.y, size.z) * 0.7;
      camera.position.set(center.x, center.y, center.z + radius * 1.5);
      controls.target.copy(center);
      controls.update();
    });

    function resizeRenderer() {
      const w = window.innerWidth;
      const h = window.innerHeight;
      renderer.setSize(w, h, false);
      camera.aspect = w / h;
      camera.updateProjectionMatrix();
    }
    window.addEventListener('resize', resizeRenderer);
    resizeRenderer();

    function render() {
      renderer.render(scene, camera);
      requestAnimationFrame(render);
    }
    render();
  </script>
</body>
</html>"""

with open(html_path, "w", encoding="utf-8") as f:
    f.write(html)
print(f"Wrote {html_path}")


Wrote 7 cameras and 908 points -> /Users/reyyan/cv-project/outputs/reconstruction/virtual_tour_data.json
Wrote /Users/reyyan/cv-project/outputs/reconstruction/virtual_tour_viewer.html


## How to view the Three.js app
1. Run the notebook cells above.
2. Start a local server from the reconstruction output folder:
   ```bash
   cd outputs/reconstruction
   python -m http.server 8000
   ```
3. Open `http://localhost:8000/virtual_tour_viewer.html` in your browser.

Controls: drag to orbit, scroll to zoom; the point cloud is colored by the source images and camera centers are shown as white spheres.

In [None]:
# Reyyan//

In [4]:
# Add this to a new cell in your notebook
import matplotlib
matplotlib.use('TkAgg')  # Required for the interactive window
from src.virtual_tour import VirtualTourViewer

# 'result' is the variable holding your SfM output from the previous cells
# 'ASSETS_DIR' is the path to your images
viewer = VirtualTourViewer(result, ASSETS_DIR)
viewer.run()

: 

In [8]:
#!pip3 install --pre open3d
!pip install trimesh

Collecting trimesh
  Downloading trimesh-4.10.1-py3-none-any.whl.metadata (13 kB)
Downloading trimesh-4.10.1-py3-none-any.whl (737 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m737.0/737.0 kB[0m [31m1.2 MB/s[0m  [33m0:00:00[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: trimesh
Successfully installed trimesh-4.10.1


In [46]:
import json
import numpy as np
import trimesh
from copy import deepcopy
from pathlib import Path

# ================= CONFIGURATION =================
OUTPUT_DIR = Path.cwd().resolve().parent / "outputs" / "reconstruction"

# 1. SCALE: Increase this to push Wall B further away!
# If it's too close, try 2.0, 3.0, or even 5.0.
SCALE_B = 2.0

# 2. POSITION: Keep 0 if you stood in the same spot
TRANSLATION_METERS = [7.0, 0.0, -2.0]  

# 3. ROTATION: Your L-shape corner
ROTATION_Y_DEGREES = -90.0   

# 4. SPIN: Keep 180 since that fixed your view
CAMERA_SPIN_CORRECTION = 180.0 

INPUT_FILES = {
    "ply_a": OUTPUT_DIR / "wall_a_dense.ply",
    "ply_b": OUTPUT_DIR / "wall_b_dense.ply",
    "json_a": OUTPUT_DIR / "cameras_wall_a.json",
    "json_b": OUTPUT_DIR / "cameras_wall_b.json"
}
OUTPUT_FILES = {
    "ply_merged": OUTPUT_DIR / "merged_scene.ply",
    "json_merged": OUTPUT_DIR / "merged_cameras.json"
}
# =================================================

def get_transform_matrix(degrees, translation):
    rads = np.radians(degrees)
    c, s = np.cos(rads), np.sin(rads)
    R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
    T = np.eye(4)
    T[:3, :3] = R
    T[:3, 3] = translation
    return T

def robust_load_and_merge(path):
    try:
        obj = trimesh.load(str(path))
    except Exception as e:
        print(f"FAILED to load {path}: {e}")
        return None
    
    def extract(o):
        v, c = [], []
        if isinstance(o, trimesh.Scene):
            for g in o.geometry.values():
                tv, tc = extract(g)
                v.extend(tv); c.extend(tc)
        elif isinstance(o, list):
            for i in o:
                tv, tc = extract(i)
                v.extend(tv); c.extend(tc)
        elif hasattr(o, 'vertices'):
            v.append(np.array(o.vertices))
            if hasattr(o, 'colors') and len(o.colors) > 0:
                c.append(np.array(o.colors))
            elif hasattr(o, 'visual') and hasattr(o.visual, 'vertex_colors') and len(o.visual.vertex_colors) > 0:
                c.append(np.array(o.visual.vertex_colors))
            else:
                c.append(np.ones((len(o.vertices), 4)) * 255)
        return v, c

    v_list, c_list = extract(obj)
    if not v_list: return None
    all_v = np.vstack(v_list)
    all_c = np.vstack(c_list) if c_list and len(c_list) == len(v_list) else None
    return trimesh.PointCloud(vertices=all_v, colors=all_c)

def transform_cameras(cameras_list, T, spin_deg=0.0, scale=1.0):
    transformed = []
    R_transform = T[:3, :3]
    t_transform = T[:3, 3]

    rads_spin = np.radians(spin_deg)
    c_s, s_s = np.cos(rads_spin), np.sin(rads_spin)
    R_spin = np.array([[c_s, 0, s_s], [0, 1, 0], [-s_s, 0, c_s]])

    for cam in cameras_list:
        R_old = np.array(cam['R'])
        t_old = np.array(cam['t'])
        
        # 1. Get Center
        C_old = -R_old.T @ t_old
        
        # 2. APPLY SCALE to Center (Push it away from origin)
        C_old = C_old * scale
        
        # 3. Transform Center
        C_new = (R_transform @ C_old) + t_transform
        
        # 4. Transform Rotation
        R_new = R_transform @ R_old
        
        if abs(spin_deg) > 0.001:
            R_new = R_new @ R_spin
            
        t_new = -R_new @ C_new
        
        new_cam = deepcopy(cam)
        new_cam['R'] = R_new.tolist()
        new_cam['t'] = t_new.tolist()
        new_cam['center'] = C_new.tolist()
        transformed.append(new_cam)
        
    return transformed

def main():
    print(f"--- Merging with SCALE: {SCALE_B} ---")
    
    T = get_transform_matrix(ROTATION_Y_DEGREES, TRANSLATION_METERS)
    
    # Load
    pcd_a = robust_load_and_merge(INPUT_FILES["ply_a"])
    pcd_b = robust_load_and_merge(INPUT_FILES["ply_b"])
    
    if pcd_a and pcd_b:
        # 1. Scale Point Cloud B
        pcd_b.vertices *= SCALE_B
        
        # 2. Transform Point Cloud B
        pcd_b.apply_transform(T)
        
        merged_v = np.vstack([pcd_a.vertices, pcd_b.vertices])
        merged_c = None
        if pcd_a.colors is not None and pcd_b.colors is not None:
            merged_c = np.vstack([pcd_a.colors, pcd_b.colors])
        
        final_pcd = trimesh.PointCloud(vertices=merged_v, colors=merged_c)
        final_pcd.export(str(OUTPUT_FILES["ply_merged"]))
        print(f"Saved merged point cloud.")

    # Merge Cameras
    with open(INPUT_FILES["json_a"], 'r') as f: data_a = json.load(f)
    with open(INPUT_FILES["json_b"], 'r') as f: data_b = json.load(f)

    cams_a = data_a.get('cameras', [])
    cams_b_raw = data_b.get('cameras', [])
    
    # Pass scale here
    cams_b_transformed = transform_cameras(cams_b_raw, T, spin_deg=CAMERA_SPIN_CORRECTION, scale=SCALE_B)
    
    max_idx = max([c['index'] for c in cams_a]) if cams_a else -1
    for i, cam in enumerate(cams_b_transformed):
        cam['index'] = max_idx + 1 + i

    merged_data = {
        "intrinsics": data_a.get("intrinsics"),
        "points": [], 
        "cameras": cams_a + cams_b_transformed
    }
    
    with open(OUTPUT_FILES["json_merged"], 'w') as f:
        json.dump(merged_data, f, indent=2)
    print(f"Saved merged cameras.")

if __name__ == "__main__":
    main()

--- Merging with SCALE: 2.0 ---
Saved merged point cloud.
Saved merged cameras.
