## Clone PIFuHD repository

In [None]:
!git clone https://github.com/facebookresearch/pifuhd

Cloning into 'pifuhd'...
remote: Enumerating objects: 222, done.[K
remote: Counting objects: 100% (126/126), done.[K
remote: Compressing objects: 100% (44/44), done.[K
remote: Total 222 (delta 92), reused 82 (delta 82), pack-reused 96 (from 1)[K
Receiving objects: 100% (222/222), 399.35 KiB | 2.12 MiB/s, done.
Resolving deltas: 100% (114/114), done.


## Capture the image

In [None]:
######## A* #########

In [None]:
cd /content/pifuhd/sample_images

/content/pifuhd/sample_images


In [None]:
######## A* #########
# from google.colab import files
# filename = list(files.upload().keys())[0]
from IPython.display import Javascript
from google.colab.output import eval_js
from base64 import b64decode
import cv2
import numpy as np

def take_photo(filename='captured_image.png', quality=0.8):
    js = f"""
    async function takePhoto() {{
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = '📷 Capture Photo';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      div.appendChild(video);
      document.body.appendChild(div);

      const stream = await navigator.mediaDevices.getUserMedia({{ video: true }});
      video.srcObject = stream;
      await video.play();

      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);

      stream.getTracks().forEach(track => track.stop());
      div.remove();

      return canvas.toDataURL('image/jpeg', {quality});
    }}
    takePhoto();
    """

    data = eval_js(js)  # Only this runs the JS and gets the return
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Capture photo
filename = take_photo()
print("Image captured and saved as:", filename)

# Load into OpenCV
image = cv2.imread(filename)
if image is not None:
    print("Image successfully loaded into OpenCV format. Shape:", image.shape)
else:
    print("Failed to load image into OpenCV.")


Image captured and saved as: captured_image.png
Image successfully loaded into OpenCV format. Shape: (480, 640, 3)


## Edited the error

In [None]:
file_path = "/content/pifuhd/apps/recon.py"

with open(file_path, "r") as file:
    lines = file.readlines()

# Edit line 150 (Python is 0-indexed, so line 150 is index 149)
target_line = 149
if target_line < len(lines):
    lines[target_line] = "        state_dict = torch.load(state_dict_path, map_location=cuda, weights_only=False)\n"

# Write the modified lines back to the file
with open(file_path, "w") as file:
    file.writelines(lines)

print("✅ Line 150 updated successfully.")


✅ Line 150 updated successfully.


## Remove background of person

In [None]:
!pip install rembg
!pip install onnxruntime


Collecting rembg
  Downloading rembg-2.0.65-py3-none-any.whl.metadata (20 kB)
Collecting pymatting (from rembg)
  Downloading PyMatting-1.1.13-py3-none-any.whl.metadata (7.5 kB)
Downloading rembg-2.0.65-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m885.0 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyMatting-1.1.13-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymatting, rembg
Successfully installed pymatting-1.1.13 rembg-2.0.65
Collecting onnxruntime
  Downloading onnxruntime-1.21.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.meta

In [None]:
from rembg import remove
from PIL import Image

In [None]:
######## A* #########

# Load the image with PIL
input_image = Image.open(filename)

# Remove background
output_image = remove(input_image)

# Save result
output_filename = "no_bg_" + filename
output_image.save(output_filename)

print("Background removed. Saved as:", output_filename)


Background removed. Saved as: no_bg_captured_image.png


## Preprocess (for cropping image)

In [None]:
######## A* #########

import os
try:
  image_path = '/content/pifuhd/sample_images/%s' % output_filename
except:
  image_path = '/content/pifuhd/sample_images/test.png' # example image
image_dir = os.path.dirname(image_path)
file_name = os.path.splitext(os.path.basename(image_path))[0]

# output pathes
obj_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.obj' % file_name
out_img_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.png' % file_name
video_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.mp4' % file_name
video_display_path = '/content/pifuhd/results/pifuhd_final/result_%s_256_display.mp4' % file_name

In [None]:
print(image_path)

/content/pifuhd/sample_images/no_bg_captured_image.png


In [None]:
cd /content

/content


## Pose Estimation setup

In [None]:
!git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git

Cloning into 'lightweight-human-pose-estimation.pytorch'...
remote: Enumerating objects: 124, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 124 (delta 21), reused 19 (delta 18), pack-reused 90 (from 1)[K
Receiving objects: 100% (124/124), 230.29 KiB | 7.68 MiB/s, done.
Resolving deltas: 100% (53/53), done.


In [None]:
######## A* #########

In [None]:
cd /content/lightweight-human-pose-estimation.pytorch/

/content/lightweight-human-pose-estimation.pytorch


In [None]:
!wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth

--2025-05-09 08:48:45--  https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth
Resolving download.01.org (download.01.org)... 92.122.14.20, 2600:1409:9800:168c::a87, 2600:1409:9800:1689::a87
Connecting to download.01.org (download.01.org)|92.122.14.20|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87959810 (84M) [application/octet-stream]
Saving to: ‘checkpoint_iter_370000.pth’


2025-05-09 08:48:46 (233 MB/s) - ‘checkpoint_iter_370000.pth’ saved [87959810/87959810]



In [None]:
import torch
import cv2
import numpy as np
from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.keypoints import extract_keypoints, group_keypoints
from modules.load_state import load_state
from modules.pose import Pose, track_poses
import demo

def get_rect(net, images, height_size):
    net = net.eval()

    stride = 8
    upsample_ratio = 4
    num_keypoints = Pose.num_kpts
    previous_poses = []
    delay = 33
    for image in images:
        rect_path = image.replace('.%s' % (image.split('.')[-1]), '_rect.txt')
        img = cv2.imread(image, cv2.IMREAD_COLOR)
        orig_img = img.copy()
        orig_img = img.copy()
        heatmaps, pafs, scale, pad = demo.infer_fast(net, img, height_size, stride, upsample_ratio, cpu=False)

        total_keypoints_num = 0
        all_keypoints_by_type = []
        for kpt_idx in range(num_keypoints):  # 19th for bg
            total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

        pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)
        for kpt_id in range(all_keypoints.shape[0]):
            all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
            all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
        current_poses = []

        rects = []
        for n in range(len(pose_entries)):
            if len(pose_entries[n]) == 0:
                continue
            pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
            valid_keypoints = []
            for kpt_id in range(num_keypoints):
                if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
                    pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
                    pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
                    valid_keypoints.append([pose_keypoints[kpt_id, 0], pose_keypoints[kpt_id, 1]])
            valid_keypoints = np.array(valid_keypoints)

            if pose_entries[n][10] != -1.0 or pose_entries[n][13] != -1.0:
              pmin = valid_keypoints.min(0)
              pmax = valid_keypoints.max(0)

              center = (0.5 * (pmax[:2] + pmin[:2])).astype(np.int_)
              radius = int(0.65 * max(pmax[0]-pmin[0], pmax[1]-pmin[1]))
            elif pose_entries[n][10] == -1.0 and pose_entries[n][13] == -1.0 and pose_entries[n][8] != -1.0 and pose_entries[n][11] != -1.0:
              # if leg is missing, use pelvis to get cropping
              center = (0.5 * (pose_keypoints[8] + pose_keypoints[11])).astype(np.int_)
              radius = int(1.45*np.sqrt(((center[None,:] - valid_keypoints)**2).sum(1)).max(0))
              center[1] += int(0.05*radius)
            else:
              center = np.array([img.shape[1]//2,img.shape[0]//2])
              radius = max(img.shape[1]//2,img.shape[0]//2)

            x1 = center[0] - radius
            y1 = center[1] - radius

            rects.append([x1, y1, 2*radius, 2*radius])

        np.savetxt(rect_path, np.array(rects), fmt='%d')

In [None]:
######## A* #########

net = PoseEstimationWithMobileNet()
checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu')
load_state(net, checkpoint)

get_rect(net.cuda(), [image_path], 512)

## Download the Pretrained Model

In [None]:
######## A* #########

In [None]:
cd /content/pifuhd/

/content/pifuhd


In [None]:
!sh ./scripts/download_trained_model.sh

+ mkdir -p checkpoints
+ cd checkpoints
+ wget https://dl.fbaipublicfiles.com/pifuhd/checkpoints/pifuhd.pt pifuhd.pt
--2025-05-09 08:48:58--  https://dl.fbaipublicfiles.com/pifuhd/checkpoints/pifuhd.pt
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 3.163.189.51, 3.163.189.14, 3.163.189.108, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|3.163.189.51|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1548375177 (1.4G) [application/octet-stream]
Saving to: ‘pifuhd.pt’


2025-05-09 08:49:08 (161 MB/s) - ‘pifuhd.pt’ saved [1548375177/1548375177]

--2025-05-09 08:49:08--  http://pifuhd.pt/
Resolving pifuhd.pt (pifuhd.pt)... failed: Name or service not known.
wget: unable to resolve host address ‘pifuhd.pt’
FINISHED --2025-05-09 08:49:08--
Total wall clock time: 9.5s
Downloaded: 1 files, 1.4G in 9.2s (161 MB/s)


## Run PIFuHD


In [None]:
######## A* #########

# Warning: all images with the corresponding rectangle files under -i will be processed.
!python -m apps.simple_test -r 256 --use_rect -i $image_dir

# seems that 256 is the maximum resolution that can fit into Google Colab.
# If you want to reconstruct a higher-resolution mesh, please try with your own machine.

Resuming from  ./checkpoints/pifuhd.pt
test data size:  1
initialize network with normal
initialize network with normal
generate mesh (test) ...
  0% 0/1 [00:00<?, ?it/s]./results/pifuhd_final/recon/result_no_bg_captured_image_256.obj
[ WARN:0@12.141] global loadsave.cpp:848 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.
100% 1/1 [00:05<00:00,  5.77s/it]


# To run again run cells containing
`######## A* #########`

---

