18/06/2025 - Nora Cécile Rosel Zaballos

Code for camera calibration and triangulation (through EasyMocap), to test multi-view with MMPose.

In [None]:
# Clone repo and change directory
!git clone https://github.com/zju3dv/EasyMocap.git
%cd /content/EasyMocap

# Install dependencies
!pip install -r requirements.txt
!pip install ipdb joblib tqdm opencv-python yacs tabulate termcolor
!pip install mediapipe==0.10.5
!pip install func_timeout
!apt-get -qq install ffmpeg

In [None]:
# Add the root EasyMocap to PYTHONPATH
import sys
sys.path.append('/content/EasyMocap')

In [None]:
#Split the frames of the videos
from google.colab import files
uploaded = files.upload()  # Select cam1.mkv, cam2.mkv, unified.mkv

Saving cam1.mkv to cam1.mkv
Saving cam2.mkv to cam2.mkv
Saving unified.mkv to unified.mkv


In [None]:
# Create clean images folders
!mkdir -p /content/EasyMocap/intri_data/images
!mkdir -p /content/EasyMocap/extri_data/images

!mkdir -p /content/EasyMocap/intri_data/videos
!mkdir -p /content/EasyMocap/extri_data/videos

In [None]:
# === Split cam1.mkv (RIGHT side) -> intri_data/videos/cam1_right.mp4
!ffmpeg -i cam1.mkv -filter:v "crop=iw/2:ih:iw/2:0" -c:a copy /content/EasyMocap/intri_data/videos/cam1.mp4

# === Split cam2.mkv (LEFT side) -> intri_data/videos/cam2_left.mp4
!ffmpeg -i cam2.mkv -filter:v "crop=iw/2:ih:0:0" -c:a copy /content/EasyMocap/intri_data/videos/cam2.mp4

# === Split unified.mkv (RIGHT side) -> extri_data/videos/unified_1.mp4
!ffmpeg -i unified.mkv -filter:v "crop=iw/2:ih:iw/2:0" -c:a copy /content/EasyMocap/extri_data/videos/cam1.mp4

# === Split unified.mkv (LEFT side) -> extri_data/videos/unified_2.mp4
!ffmpeg -i unified.mkv -filter:v "crop=iw/2:ih:0:0" -c:a copy /content/EasyMocap/extri_data/videos/cam2.mp4

In [None]:
# Extract frames for all videos (ignore --num, then trim manually)
!python3 /content/EasyMocap/scripts/preprocess/extract_video.py /content/EasyMocap/intri_data --no2d
!python3 /content/EasyMocap/scripts/preprocess/extract_video.py /content/EasyMocap/extri_data --no2d

cam1.mp4  : 100% 458/458 [00:02<00:00, 206.50it/s]
cam2.mp4  : 100% 949/949 [00:04<00:00, 198.67it/s]
cameras:  cam1 cam2
cam1.mp4  : 100% 1621/1621 [00:07<00:00, 207.94it/s]
cam2.mp4  : 100% 1621/1621 [00:08<00:00, 193.31it/s]
cameras:  cam1 cam2


In [None]:
%%bash
for d in /content/EasyMocap/intri_data/images/*; do
  ls "$d" | sort | tail -n +201 | sed "s|^|$d/|" | xargs rm -f
done

for d in /content/EasyMocap/extri_data/images/*; do
  ls "$d" | sort | tail -n +201 | sed "s|^|$d/|" | xargs rm -f
done


In [None]:
# INTRINSICS
!PYTHONPATH=/content/EasyMocap python3 /content/EasyMocap/apps/calibration/detect_chessboard.py /content/EasyMocap/intri_data --out /content/EasyMocap/intri_data/output --pattern 8,6 --grid 0.024

# EXTRINSICS
!PYTHONPATH=/content/EasyMocap python3 /content/EasyMocap/apps/calibration/detect_chessboard.py /content/EasyMocap/extri_data --out /content/EasyMocap/extri_data/output --pattern 8,6 --grid 0.024


Create chessboard (8, 6)
create template chessboard:   0% 0/400 [00:00<?, ?it/s]create template chessboard:  53% 212/400 [00:00<00:00, 2111.62it/s]create template chessboard: 100% 400/400 [00:00<00:00, 2129.67it/s]
- Load data from /content/EasyMocap/intri_data
- Try to find image names...
  -> find 400 images
- Try to find annot names...
  -> find 400 annots
Create chessboard (8, 6)
create template chessboard: 100% 400/400 [00:00<00:00, 2145.95it/s]
- Load data from /content/EasyMocap/extri_data
- Try to find image names...
  -> find 400 images
- Try to find annot names...
  -> find 400 annots


In [None]:
# === Intrinsic Calibration ===
!PYTHONPATH=/content/EasyMocap python3 /content/EasyMocap/apps/calibration/calib_intri.py /content/EasyMocap/intri_data

read:   0% 0/200 [00:00<?, ?it/s]read: 100% 200/200 [00:00<00:00, 5665.95it/s]
[32m[calibration] Load 200 images[0m
>> Camera cam1: 200 frames
-> [calibrate           ]:  64.0s
read: 100% 200/200 [00:00<00:00, 5737.45it/s]
[32m[calibration] Load 200 images[0m
>> Camera cam2: 200 frames
-> [calibrate           ]:  58.7s


In [None]:
# === Extrinsic Calibration ===
!PYTHONPATH=/content/EasyMocap python3 /content/EasyMocap/apps/calibration/calib_extri.py /content/EasyMocap/extri_data --intri /content/EasyMocap/intri_data/output/intri.yml

cam1 center => [0.16149051 0.06480517 0.30780421], err = 0.322
cam2 center => [-0.02143001 -0.26360351  0.22161923], err = 0.607


In [None]:
# === Check Calibration Results (with Chessboard) ===
# !PYTHONPATH=/content/EasyMocap python3 /content/EasyMocap/apps/calibration/check_calib.py /content/EasyMocap/extri_data --out /content/EasyMocap/intri_data/output --vis --show
!PYTHONPATH=/content/EasyMocap python3 /content/EasyMocap/apps/calibration/check_calib.py /content/EasyMocap/extri_data --out /content/EasyMocap/extri_data --mode cube --write

Check (8, 4) points
[check] cameras:  ['cam1', 'cam2']
 - cam1: center = [[0.161 0.065 0.308]], look at = [[ 0.064  0.132 -0.685]]
 - cam2: center = [[-0.021 -0.264  0.222]], look at = [[ 0.021  0.546 -0.364]]


In [None]:
#Upload the video and split it
from google.colab import files
uploaded = files.upload()

Saving patient_1.mp4 to patient_1.mp4


In [None]:
!mv /content/EasyMocap/patient_1.mp4 /content/

In [None]:
# Split into left and right halves
!ffmpeg -i /content/patient_1.mp4 -filter:v "crop=iw/2:ih:0:0" -c:a copy /content/hand2.mp4 #Left half
!ffmpeg -i /content/patient_1.mp4 -filter:v "crop=iw/2:ih:iw/2:0" -c:a copy /content/hand1.mp4 #Right half

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [None]:
# Extract the first 200 frames of the videos
import cv2
import os

def extract_frames(video_path, out_dir, max_frames=200):
    os.makedirs(out_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    idx = 0
    while cap.isOpened() and idx < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        cv2.imwrite(os.path.join(out_dir, f"frame_{idx:05d}.jpg"), frame)
        idx += 1
    cap.release()

# Extract up to 200 frames from each video
extract_frames('hand1.mp4', 'frames/hand1', max_frames=200)
extract_frames('hand2.mp4', 'frames/hand2', max_frames=200)

In [None]:
!pip install 'mmcv>=2.0.1' -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.7.1/index.html

In [None]:
# Install MMpose and MMDetection
!pip3 install openmim
!mim install mmengine
!mim install "mmdet>=3.1.0"
!mim install "mmpose>=1.1.0"

In [None]:
import mmpose
import mmengine
import mmcv
print(mmpose.__version__)
print (mmengine.__version__)
print (mmcv.__version__)

1.3.2
0.10.7
2.2.0


In [None]:
!mim download mmpose --config td-hm_hrnet-w48_8xb32-210e_coco-256x192  --dest .

processing td-hm_hrnet-w48_8xb32-210e_coco-256x192...
[2Kdownloading [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.7/256.7 MiB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[?25h[32mSuccessfully downloaded td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth to /content[0m
[32mSuccessfully dumped td-hm_hrnet-w48_8xb32-210e_coco-256x192.py to /content[0m


In [None]:
from mmpose.apis import inference_topdown, init_model
import numpy as np

register_all_modules()

config_file = 'td-hm_hrnet-w48_8xb32-210e_coco-256x192.py'
checkpoint_file = 'td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth'
model = init_model(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'

# please prepare an image with person
results = inference_topdown(model, 'demo.jpg')


In [None]:
#Run inference on frames and obtain 2D keypoints

import glob
from tqdm import tqdm

def extract_keypoints(img_dir, out_dir):
    os.makedirs(out_dir, exist_ok=True)
    img_paths = sorted(glob.glob(f'{img_dir}/*.jpg'))
    for img_path in tqdm(img_paths):
        img = cv2.imread(img_path)
        result = inference_topdown(model, img)
        keypoints = result[0].pred_instances.keypoints if result else None
        if keypoints is not None:
            np.save(os.path.join(out_dir, os.path.basename(img_path).replace('.jpg', '.npy')), keypoints)

# Run MMpose on both views
extract_keypoints('frames/hand1', 'keypoints/hand1')
extract_keypoints('frames/hand2', 'keypoints/hand2')
