From the high-level, the steps are:

1. Run [COLMAP — COLMAP 3.8 documentation](https://colmap.github.io/) to generate the sparse point cloud and the camera poses for custom datasets.

2. Run [yenchenlin/nerf-pytorch: A PyTorch implementation of NeRF (Neural Radiance Fields) that reproduces the results.](https://github.com/yenchenlin/nerf-pytorch) to generate the 3D reconstruction.

For reproducibility, I demonstrate some of the problems that might occur and the solutions to them.

`LLFF` related problems when creating custom data:

when converting colmap results (bin files) into the format of LLFF (npy files) using [LLFF/imgs2poses.py at master · Fyusion/LLFF](https://github.com/Fyusion/LLFF/blob/master/imgs2poses.py), the current code repo is having problems and should be solved using code here:

[Update pose_utils.py by starhiking · Pull Request #60 · Fyusion/LLFF](https://github.com/Fyusion/LLFF/pull/60)

note that due to low quality of images you may also need to delete images according to the output `view_imgs.txt` file.

`NeRF` related problems:

it maybe caused by the compatibility problem when running the `run_nerf.py` file. I solved it by changing the `load_llff.py` file as follows:

1. make our own folder e.g. `images_8` (12.5% of original size) where images are rescaled using own code below

2. comment out the line `_minify(basedir, factors=[factor])` in `load_llff.py` file - this is to load our own rescaled images in step 1


In [1]:
import cv2
import os

images = os.listdir('data/nerf_llff_data/colmap/images')

export_path = 'data/nerf_llff_data/colmap/images_8'

if not os.path.exists(export_path):
    os.mkdir(export_path)

for image in images:
    img = cv2.imread('data/nerf_llff_data/colmap/images/' + image)
    scale_percent = 12.5 # percent of original size
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)
    # resize image
    resized = cv2.resize(img, dim)
    cv2.imwrite(export_path + '/' + image, resized)

In [36]:
import numpy as np

# load poses_bounds.npy
poses_arr = np.load('data/nerf_llff_data/colmap/poses_bounds.npy')
poses_arr.shape
# here 3 images were dropped due to COLMAP
# check `view_imgs.txt` and we can see that the first 3 images are dropped

(104, 17)

In [31]:
poses_arr[-1]

array([-1.19424518e-04,  9.99985718e-01, -5.34317956e-03,  8.51247692e-02,
        2.16000000e+02,  9.99964594e-01,  7.44606569e-05, -8.41459574e-03,
       -9.31933576e-01,  2.70000000e+02, -8.41407771e-03, -5.34399529e-03,
       -9.99950321e-01, -6.29752099e+00,  1.00959705e+02,  2.91582581e+00,
        1.34089396e+01])

In [None]:
# ready to run NeRF - for better results, use 1x downsampled images (no downsampling in this example)
# !python3 run_nerf.py --config configs/colon_paper.txt

Using results from SLAM instead of COLMAP:


ref: [Fyusion/LLFF: Code release for Local Light Field Fusion at SIGGRAPH 2019](https://github.com/Fyusion/LLFF#using-your-own-poses-without-running-colmap)

In [38]:
# pose results

import os
import numpy as np
path = '/playpen2/luchaoqi/results/031/result/colon_norm_preall_abs_nosm'
file = 'pose_result.txt'

poses = []
with open(os.path.join(path, file), 'r') as f:
    for line in f:
        line = line.strip()
        if line:
            line = line.split(' ')
            # remove last element
            line.pop()
            poses.append(np.array(line, dtype=np.float64))
poses = np.array(poses)
poses.shape
# here from the results we can see 1 image was dropped from SLAM
# check the index and we find that the first image was dropped

(106, 12)

In [39]:
# close / far depth
path = '/playpen2/luchaoqi/results/031/result/colon_norm_preall_abs_nosm/depth/'
mins = []
maxs = []
import numpy as np
for file in os.listdir(path):
    if file.endswith('.bin') and 'pose' not in file:
        arr = np.fromfile(os.path.join(path, file), dtype=np.float32)
        mins.append(arr.min())
        maxs.append(arr.max())
print(min(mins), max(maxs))

0.19576538 1.5596371


In [34]:
near = 0.1
far = 2.0
img_width = 270
img_height = 216
focal = 145.4410

adjusted_poses = []

for pose in poses:
    # convert pose to 3x4 matrix
    pose = pose.reshape(3, 4)
    # concatenate each pose with [height, width, focal] to get a 3x5 matrix
    pose = np.concatenate((pose, np.array([img_height, img_width, focal]).reshape(3,1)), axis=1)
    # flatten the matrix to 15 elements
    pose = pose.flatten()
    # concatenate near and far depth
    pose = np.concatenate((pose, np.array([near, far])))
    adjusted_poses.append(np.array(pose, dtype=np.float64))
    
adjusted_poses = np.array(adjusted_poses)
adjusted_poses.shape

(106, 17)

In [28]:
# save adjusted poses
export_dir = 'data/nerf_llff_data/slam'
if not os.path.exists(export_dir):
    os.mkdir(export_dir)
path = os.path.join(export_dir, 'poses_bounds.npy')
np.save(path, adjusted_poses)

In [37]:
# # resize images

# import cv2
# import os

# images = os.listdir('data/nerf_llff_data/slam/images')

# export_path = 'data/nerf_llff_data/slam/images_8'

# if not os.path.exists(export_path):
#     os.mkdir(export_path)

# for image in images:
#     img = cv2.imread('data/nerf_llff_data/slam/images/' + image)
#     scale_percent = 12.5 # percent of original size
#     width = int(img.shape[1] * scale_percent / 100)
#     height = int(img.shape[0] * scale_percent / 100)
#     dim = (width, height)
#     # resize image
#     resized = cv2.resize(img, dim)
#     cv2.imwrite(export_path + '/' + image, resized)

In [None]:
# ready to run NeRF - for better results, use 1x downsampled images (no downsampling in this example)
# !python3 run_nerf.py --config configs/slam.txt

Using JHU data. These sequences are more oblique/en face and have relatively more geometric structures for these kinds of views.

In [1]:
# close / far depth
import os
import numpy as np
path = 'data/nerf_llff_data/jhu/GeoDepth2/'
mins = []
maxs = []
for file in os.listdir(path):
    if file.endswith('.npy'):
        # load depth
        depth = np.load(os.path.join(path, file))
        mins.append(depth.min())
        maxs.append(depth.max())
print(min(mins), max(maxs))

-0.16404422 5.5122375


In [20]:
import numpy as np
near = -0.2
far = 6.0
img_width = 270
img_height = 216
focal = 154.058

path = "data/nerf_llff_data/jhu"
pose_file = "pose.txt"

poses = []
with open(os.path.join(path, pose_file), 'r') as f:
    for line in f:
        line = line.strip()
        if line:
            line = line.split(',')
            line = line[:-4]
            pose = map(float, line)
            pose = np.reshape(list(pose), (3, 4))
            pose = np.concatenate((pose, np.array([img_height, img_width, focal]).reshape(3,1)), axis=1)
            pose = pose.flatten()
            pose = np.concatenate((pose, np.array([near, far])))
            poses.append(np.array(pose, dtype=np.float64))

export_path = os.path.join(path, 'poses_bounds.npy')
np.save(export_path, np.array(poses))

In [None]:
# ready to run NeRF - for better results, use 1x downsampled images (no downsampling in this example)
# !python3 run_nerf.py --config configs/jhu.txt