# NeRF 실습자료

8월 22일 AI Expert 과정을 위해 제작된 colab입니다.
본 자료에서는 NeRF의 기초 및 활용에 대해 탐구합니다. NeRF에 관한 이론적 배경은 다음 논문을 참고해주세요. [Implicit Neural Activations with Periodic Activation Functions](https://arxiv.org/pdf/2210.00379.pdf).

본 실습은 다음의 순서로 구성되어 있습니다.
* TinyNeRF
* NeRFStudio
* ThreeStudio

**본 코랩 파일을 본인 계정의 구글 드라이브에 저장하시면 수정 후 저장이 가능합니다: 메뉴바의 File --> Save a copy in Drive**

**GPU를 활성화하였는지 확인해주세요: 메뉴바의 Edit --> Notebook Setting --> T4 GPU 선택**

# 1. TinyNeRF

빠른 학습 및 시각화를 위해 성능을 낮춘 NeRF 모델
* 기존 NeRF에 비해 20배가량의 적은 파라미터
* 5D input이 view direction을 포함하지 않음
* Hierarchical Sampling을 진행하지 않음

In [None]:
import os,sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

if not os.path.exists('tiny_nerf_data.npz'):
    !wget http://cseweb.ucsd.edu/~viscomp/projects/LF/papers/ECCV20/nerf/tiny_nerf_data.npz

--2023-08-22 02:13:58--  http://cseweb.ucsd.edu/~viscomp/projects/LF/papers/ECCV20/nerf/tiny_nerf_data.npz
Resolving cseweb.ucsd.edu (cseweb.ucsd.edu)... 132.239.8.30
Connecting to cseweb.ucsd.edu (cseweb.ucsd.edu)|132.239.8.30|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cseweb.ucsd.edu//~viscomp/projects/LF/papers/ECCV20/nerf/tiny_nerf_data.npz [following]
--2023-08-22 02:13:59--  https://cseweb.ucsd.edu//~viscomp/projects/LF/papers/ECCV20/nerf/tiny_nerf_data.npz
Connecting to cseweb.ucsd.edu (cseweb.ucsd.edu)|132.239.8.30|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12727482 (12M)
Saving to: ‘tiny_nerf_data.npz’


2023-08-22 02:14:00 (17.4 MB/s) - ‘tiny_nerf_data.npz’ saved [12727482/12727482]



In [None]:
#Search for GPU to run on
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Load in data
rawData = np.load("tiny_nerf_data.npz")
images = rawData["images"]
poses = rawData["poses"]
focal = rawData["focal"]
H, W = images.shape[1:3]
H = int(H)
W = int(W)
print(images.shape, poses.shape, focal)

testimg, testpose = images[99], poses[99]
plt.imshow(testimg)
plt.show()
images = torch.Tensor(images).to(device)
poses = torch.Tensor(poses).to(device)
testimg = torch.Tensor(testimg).to(device)
testpose = torch.Tensor(testpose).to(device)

In [None]:
def get_rays(H, W, focal, pose):
  i, j = torch.meshgrid(
      torch.arange(W, dtype=torch.float32),
      torch.arange(H, dtype=torch.float32)
      )
  i = i.t()
  j = j.t()
  dirs = torch.stack(
      [(i-W*0.5)/focal,
       -(j-H*0.5)/focal,
       -torch.ones_like(i)], -1).to(device)
  rays_d = torch.sum(dirs[..., np.newaxis, :] * pose[:3, :3], -1)
  rays_o = pose[:3,-1].expand(rays_d.shape)
  return rays_o, rays_d

In [None]:
def positional_encoder(x, L_embed=6):
  rets = [x]
  for i in range(L_embed):
    for fn in [torch.sin, torch.cos]:
      rets.append(fn(2.**i *x))#(2^i)*x
  return torch.cat(rets, -1)

def cumprod_exclusive(tensor: torch.Tensor) -> torch.Tensor:
  cumprod = torch.cumprod(tensor, -1)
  cumprod = torch.roll(cumprod, 1, -1)
  cumprod[..., 0] = 1.
  return cumprod

def render(model, rays_o, rays_d, near, far, n_samples, rand=False):
  def batchify(fn, chunk=1024*32):
      return lambda inputs: torch.cat([fn(inputs[i:i+chunk]) for i in range(0, inputs.shape[0], chunk)], 0)

  z = torch.linspace(near, far, n_samples).to(device)
  if rand:
    mids = 0.5 * (z[..., 1:] + z[...,:-1])
    upper = torch.cat([mids, z[...,-1:]], -1)
    lower = torch.cat([z[...,:1], mids], -1)
    t_rand = torch.rand(z.shape).to(device)
    z = lower + (upper-lower)*t_rand

  points = rays_o[..., None,:] + rays_d[..., None,:] * z[...,:,None]

  flat_points = torch.reshape(points, [-1, points.shape[-1]])
  flat_points = positional_encoder(flat_points)
  raw = batchify(model)(flat_points)
  raw = torch.reshape(raw, list(points.shape[:-1]) + [4])

  #Compute opacitices and color
  sigma = F.relu(raw[..., 3])
  rgb = torch.sigmoid(raw[..., :3])

  #Volume Rendering
  one_e_10 = torch.tensor([1e10], dtype=rays_o.dtype).to(device)
  dists = torch.cat((z[..., 1:] - z[..., :-1],
                  one_e_10.expand(z[..., :1].shape)), dim=-1)
  alpha = 1. - torch.exp(-sigma * dists)
  weights = alpha * cumprod_exclusive(1. - alpha + 1e-10)

  rgb_map = (weights[...,None]* rgb).sum(dim=-2)
  depth_map = (weights * z).sum(dim=-1)
  acc_map = weights.sum(dim=-1)
  return rgb_map, depth_map, acc_map


In [None]:
#helper functions
mse2psnr = lambda x : -10. * torch.log(x) / torch.log(torch.Tensor([10.])).to(device)

def train(model, optimizer, n_iters = 3001):
  #Track loss over time for graphing
  psnrs = []
  iternums = []
  plot_step = 500
  n_samples = 64
  for i in range(n_iters):
    #Choose random image and use it for training
    images_idx = np.random.randint(images.shape[0])
    target = images[images_idx]
    pose = poses[images_idx]

    #Core optimizer loop
    rays_o, rays_d = get_rays(H, W, focal, pose)
    rgb, disp, acc = render(model, rays_o, rays_d, near=2., far=6., n_samples=n_samples, rand=True)
    optimizer.zero_grad()
    image_loss = torch.nn.functional.mse_loss(rgb, target)
    image_loss.backward()
    optimizer.step()

    if i%plot_step==0:
      #Render shown image above as model begins to learn
      with torch.no_grad():
        rays_o, rays_d = get_rays(H, W, focal, testpose)
        rgb, depth, acc = render(model, rays_o, rays_d, near=2., far=6., n_samples=n_samples)
        loss = torch.nn.functional.mse_loss(rgb, testimg)
        psnr = mse2psnr(loss).cpu()

        psnrs.append(psnr)
        iternums.append(i)

        plt.figure(figsize=(10,5))
        plt.subplot(121)
        #copy from gpu memory to cpu
        picture = rgb.cpu()
        plt.imshow(picture)
        plt.title(f'Iterations: {i}')
        plt.subplot(122)
        plt.plot(iternums, psnrs)
        plt.title('PSNR')
        plt.show()

In [None]:
class VeryTinyNerfModel(torch.nn.Module):
  def __init__(self, filter_size=128, num_encoding_functions=6):
    super(VeryTinyNerfModel, self).__init__()
    # Input layer (default: 39 -> 128)
    self.layer1 = torch.nn.Linear(3 + 3 * 2 * num_encoding_functions, filter_size)
    # Layer 2 (default: 128 -> 128)
    self.layer2 = torch.nn.Linear(filter_size, filter_size)
    # Layer 3 (default: 128 -> 4)
    self.layer3 = torch.nn.Linear(filter_size, 4)
    # Short hand for torch.nn.functional.relu
    self.relu = torch.nn.functional.relu

  def forward(self, x):
    x = self.relu(self.layer1(x))
    x = self.relu(self.layer2(x))
    x = self.layer3(x)
    return x

In [None]:
#Run all the actual code
nerf = VeryTinyNerfModel()
nerf = nn.DataParallel(nerf).to(device)
optimizer = torch.optim.Adam(nerf.parameters(), lr=5e-3, eps = 1e-7)
train(nerf, optimizer)

In [None]:
%matplotlib inline
from ipywidgets import interactive, widgets


trans_t = lambda t : torch.tensor([
    [1,0,0,0],
    [0,1,0,0],
    [0,0,1,t],
    [0,0,0,1],
], dtype=torch.float32)

rot_phi = lambda phi : torch.tensor([
    [1,0,0,0],
    [0,np.cos(phi),-np.sin(phi),0],
    [0,np.sin(phi), np.cos(phi),0],
    [0,0,0,1],
], dtype=torch.float32)

rot_theta = lambda th : torch.tensor([
    [np.cos(th),0,-np.sin(th),0],
    [0,1,0,0],
    [np.sin(th),0, np.cos(th),0],
    [0,0,0,1],
], dtype=torch.float32)


def pose_spherical(theta, phi, radius):
    c2w = trans_t(radius)
    c2w = rot_phi(phi/180.*np.pi) @ c2w
    c2w = rot_theta(theta/180.*np.pi) @ c2w
    c2w = torch.tensor([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]], dtype=torch.float32) @ c2w
    return c2w


def f(**kwargs):
    c2w = pose_spherical(**kwargs).cuda()
    rays_o, rays_d = get_rays(H, W, focal, c2w[:3,:4])
    rgb, depth, acc = render(nerf, rays_o, rays_d, near=2., far=6., n_samples=64)
    img = np.clip(rgb.cpu().detach().numpy(),0,1)

    plt.figure(2, figsize=(20,6))
    plt.imshow(img)
    plt.show()


sldr = lambda v, mi, ma: widgets.FloatSlider(
    value=v,
    min=mi,
    max=ma,
    step=.01,
)

names = [
    ['theta', [100., 0., 360]],
    ['phi', [-30., -90, 0]],
    ['radius', [4., 3., 5.]],
]

interactive_plot = interactive(f, **{s[0] : sldr(*s[1]) for s in names})
output = interactive_plot.children[-1]
output.layout.height = '350px'
interactive_plot

#2. NeRFStudio

다양한 NeRF 모델을 쉽게 활용할 수 있도록 제작된 플랫폼.

* 여러 종류의 NeRF 모델 (dynamic nerf, editing nerf, 3d diffusion model, fast nerf)을 포함
* 강력한 Visualizer를 지원하여, 원하는 뷰 이미지를 간편하게 렌더링 가능
* Dataloader, ray sampler, encoder 등, nerf의 각 모듈을 손쉽게 수정할 수 있어서 새로운 모델 개발 용이

In [None]:
#@markdown Install Nerfstudio and Dependencies (~8 min)
%cd /content/
!pip install --upgrade pip
!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118

# Installing TinyCuda
%cd /content/
!gdown "https://drive.google.com/u/1/uc?id=1-7x7qQfB7bIw2zV4Lr6-yhvMpjXC84Q5&confirm=t"
!pip install tinycudann-1.7-cp310-cp310-linux_x86_64.whl

# Installing COLMAP
%cd /content/
!gdown "https://drive.google.com/u/0/uc?id=15WngFRNar_b8CaPR5R-hvQ3eAnlyk_SL&confirm=t"
!sudo apt-get install \
    build-essential \
    libboost-program-options-dev \
    libboost-filesystem-dev \
    libboost-graph-dev \
    libboost-system-dev \
    libboost-test-dev \
    libeigen3-dev \
    libflann-dev \
    libfreeimage-dev \
    libmetis-dev \
    libgoogle-glog-dev \
    libgflags-dev \
    libsqlite3-dev \
    libglew-dev \
    qtbase5-dev \
    libqt5opengl5-dev \
    libcgal-dev \
    libceres-dev
!unzip -o local.zip -d /usr/
!chmod +x /usr/local/bin/colmap

# Install nerfstudio
%cd /content/
!pip install tensorboard<2.13
!pip install git+https://github.com/nerfstudio-project/nerfstudio.git

In [None]:
#@markdown <h1> Downloading and Processing Data</h1>
#@markdown <h3>Pick the preset scene or upload your own images/video</h3>
import os
import glob
from google.colab import files
from IPython.core.display import display, HTML

scene = '\uD83D\uDE9C dozer' #@param ['🖼 poster', '🚜 dozer', '🌄 desolation', '📤 upload your images' , '🎥 upload your own video', '🔺 upload Polycam data', '💽 upload your own Record3D data']
scene = ' '.join(scene.split(' ')[1:])

if scene == "upload Polycam data":
    %cd /content/
    !mkdir -p /content/data/nerfstudio/custom_data
    %cd /content/data/nerfstudio/custom_data/
    uploaded = files.upload()
    dir = os.getcwd()
    if len(uploaded.keys()) > 1:
        print("ERROR, upload a single .zip file when processing Polycam data")
    dataset_dir = [os.path.join(dir, f) for f in uploaded.keys()][0]
    !ns-process-data polycam --data $dataset_dir --output-dir /content/data/nerfstudio/custom_data/
    scene = "custom_data"
elif scene == 'upload your own Record3D data':
    display(HTML('<h3>Zip your Record3D folder, and upload.</h3>'))
    display(HTML('<h3>More information on Record3D can be found <a href="https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#record3d-capture" target="_blank">here</a>.</h3>'))
    %cd /content/
    !mkdir -p /content/data/nerfstudio/custom_data
    %cd /content/data/nerfstudio/custom_data/
    uploaded = files.upload()
    dir = os.getcwd()
    preupload_datasets = [os.path.join(dir, f) for f in uploaded.keys()]
    record_3d_zipfile = preupload_datasets[0]
    !unzip $record_3d_zipfile -d /content/data/nerfstudio/custom_data
    custom_data_directory = glob.glob('/content/data/nerfstudio/custom_data/*')[0]
    !ns-process-data record3d --data $custom_data_directory --output-dir /content/data/nerfstudio/custom_data/
    scene = "custom_data"
elif scene in ['upload your images', 'upload your own video']:
    display(HTML('<h3>Select your custom data</h3>'))
    display(HTML('<p/>You can select multiple images by pressing ctrl, cmd or shift and click.<p>'))
    display(HTML('<p/>Note: This may take time, especially on higher resolution inputs, so we recommend to download dataset after creation.<p>'))
    !mkdir -p /content/data/nerfstudio/custom_data
    if scene == 'upload your images':
        !mkdir -p /content/data/nerfstudio/custom_data/raw_images
        %cd /content/data/nerfstudio/custom_data/raw_images
        uploaded = files.upload()
        dir = os.getcwd()
    else:
        %cd /content/data/nerfstudio/custom_data/
        uploaded = files.upload()
        dir = os.getcwd()
    preupload_datasets = [os.path.join(dir, f) for f in uploaded.keys()]
    del uploaded
    %cd /content/

    if scene == 'upload your images':
        !ns-process-data images --data /content/data/nerfstudio/custom_data/raw_images --output-dir /content/data/nerfstudio/custom_data/
    else:
        video_path = preupload_datasets[0]
        !ns-process-data video --data $video_path --output-dir /content/data/nerfstudio/custom_data/

    scene = "custom_data"
else:
    %cd /content/
    !ns-download-data nerfstudio --capture-name=$scene

print("Data Processing Succeeded!")

In [None]:
#@markdown <h1>Set up and Start Viewer</h1>

%cd /content

# Install localtunnel
# We are using localtunnel https://github.com/localtunnel/localtunnel but ngrok could also be used
!npm install -g localtunnel

# Tunnel port 7007, the default for
!rm url.txt 2> /dev/null
get_ipython().system_raw('lt --port 7007 >> url.txt 2>&1 &')

import time
time.sleep(3) # the previous command needs time to write to url.txt


with open('url.txt') as f:
  lines = f.readlines()
websocket_url = lines[0].split(": ")[1].strip().replace("https", "wss")
# from nerfstudio.utils.io import load_from_json
# from pathlib import Path
# json_filename = "nerfstudio/nerfstudio/viewer/app/package.json"
# version = load_from_json(Path(json_filename))["version"]
url = f"https://viewer.nerf.studio/?websocket_url={websocket_url}"
print(url)
print("You may need to click Refresh Page after you start training!")
from IPython import display
display.IFrame(src=url, height=800, width="100%")

In [None]:
#@markdown <h1>Start Training</h1>

%cd /content
if os.path.exists(f"data/nerfstudio/{scene}/transforms.json"):
    !ns-train nerfacto --viewer.websocket-port 7007 nerfstudio-data --data data/nerfstudio/$scene --downscale-factor 4
else:
    from IPython.core.display import display, HTML
    display(HTML('<h3 style="color:red">Error: Data processing did not complete</h3>'))
    display(HTML('<h3>Please re-run `Downloading and Processing Data`, or view the FAQ for more info.</h3>'))

In [None]:
!ns-render camera-path --load-config outputs/unnamed/nerfacto/2023-08-21_125010/config.yml --camera-path-filename outputs/unnamed/nerfacto/2023-08-21_125010/camera_paths/2023-08-21_125010.json --output-path renders/2023-08-21_125010/2023-08-21_125010.mp4

In [None]:
#@title # Render Video { vertical-output: true }
#@markdown <h3>Export the camera path from within the viewer, then run this cell.</h3>
#@markdown <h5>The rendered video should be at renders/output.mp4!</h5>


base_dir = "/content/outputs/unnamed/nerfacto/"
training_run_dir = base_dir + os.listdir(base_dir)[0]

from IPython.core.display import display, HTML
display(HTML('<h3>Upload the camera path JSON.</h3>'))
%cd $training_run_dir
uploaded = files.upload()
uploaded_camera_path_filename = list(uploaded.keys())[0]

config_filename = training_run_dir + "/config.yml"
camera_path_filename = training_run_dir + "/" + uploaded_camera_path_filename
camera_path_filename = camera_path_filename.replace(" ", "\\ ").replace("(", "\\(").replace(")", "\\)")

%cd /content/
!ns-render camera-path --load-config $config_filename --camera-path-filename $camera_path_filename --output-path renders/output.mp4

[2;36m[19:48:48][0m[2;36m [0mSkipping [1;36m0[0m files in dataset split train.                                          ]8;id=527413;file:///content/nerfstudio/nerfstudio/data/dataparsers/nerfstudio_dataparser.py\[2mnerfstudio_dataparser.py[0m]8;;\[2m:[0m]8;id=243595;file:///content/nerfstudio/nerfstudio/data/dataparsers/nerfstudio_dataparser.py#91\[2m91[0m]8;;\
[2;36m          [0m[2;36m [0mSkipping [1;36m0[0m files in dataset split test.                                           ]8;id=109270;file:///content/nerfstudio/nerfstudio/data/dataparsers/nerfstudio_dataparser.py\[2mnerfstudio_dataparser.py[0m]8;;\[2m:[0m]8;id=464675;file:///content/nerfstudio/nerfstudio/data/dataparsers/nerfstudio_dataparser.py#91\[2m91[0m]8;;\
[2KLoading data batch [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [36m0:00:00[0m
  cpuset_checked))
[2KLoading data batch [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [36m0:00:00[0m
  f

In [None]:
#@title # 중지된 training 재개
base_dir = "/content/outputs/unnamed/nerfacto/"
training_run_dir = base_dir + os.listdir(base_dir)[0] + '/nerfstudio_models'

%cd /content/
!ns-train nerfacto --load-dir {training_run_dir} --viewer.websocket-port 7007 nerfstudio-data --data data/nerfstudio/$scene --downscale-factor 4

### NeRFStudio를 이용한 다양한 모델의 활용
* nerfacto는 여러 논문에서 component를 조합하여 만든 pipeline임
* nerfstudio는 nerfacto 이외에 다양한 모델들을 지원
  - Instant-NGP
  - [Instruct-NeRF2NeRF](https://docs.nerf.studio/en/latest/nerfology/methods/in2n.html)
  - K-Planes
  - [LERF](https://docs.nerf.studio/en/latest/nerfology/methods/lerf.html)
  - Mip-NeRF
  - NeRF
  - Nerfacto
  - Nerfbusters
  - NeRFPlayer
  - Tetra-NeRF
  - TensoRF
  - [Generfacto](https://docs.nerf.studio/en/latest/nerfology/methods/generfacto.html)

#### Training TensoRF

In [None]:
!ns-train tensorf --viewer.websocket-port 7007 nerfstudio-data --data data/nerfstudio/$scene --downscale-factor 4

# 3. ThreeStudio

#### Clone threestudio repo

In [None]:
!git clone https://github.com/threestudio-project/threestudio.git
%cd threestudio

#### Install Dependencies

In [None]:
!pip install ninja
!pip install lightning==2.0.0 omegaconf==2.3.0 jaxtyping typeguard diffusers transformers accelerate opencv-python tensorboard matplotlib imageio imageio[ffmpeg] trimesh bitsandbytes sentencepiece safetensors huggingface_hub libigl xatlas networkx pysdf PyMCubes wandb torchmetrics controlnet_aux
!pip install einops kornia taming-transformers-rom1504 git+https://github.com/openai/CLIP.git # zero123
!pip install open3d plotly # mesh visualization
!pip install git+https://github.com/ashawkey/envlight.git
!pip install git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2
!pip install git+https://github.com/NVlabs/nvdiffrast.git
!pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch

#### Login to HuggingFace

In [None]:
from huggingface_hub import interpreter_login

interpreter_login()

#### Generate 3D Model

In [None]:
prompt = "a zoomed out DSLR photo of a baby bunny sitting on top of a stack of pancakes"
!python launch.py --config configs/dreamfusion-sd.yaml --train --gpu 0 system.prompt_processor.prompt="$prompt" trainer.max_steps=1000 system.prompt_processor.spawn=false

#### Display the rendered Video

In [None]:
from IPython.display import HTML
from base64 import b64encode
def display_video(video_path):
  mp4 = open(video_path,'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  return HTML("""
  <video width=1000 controls>
    <source src="%s" type="video/mp4">
  </video>
  """ % data_url)

In [None]:
# you will see the path to the saving directory at the end of the training logs
# replace save_dir below with that path
save_dir = 'path/to/save/dir'

import os
import glob
video_path = glob.glob(os.path.join(save_dir, "*-test.mp4"))[0]
display_video(video_path)

#### Extract the object mesh.

Here we use an empirical threshold value. You can also first try system.geometry.isosurface_threshold=auto and visualize it. Then you can manually adjust the threshold according to the automatically determined value shown in the logs. Increase it if there are too many floaters and decrease it if the geometry is incomplete.

The extraction process takes around 2 mins on T4.

In [None]:
!python launch.py --config $save_dir/../configs/parsed.yaml --export --gpu 0 resume=$save_dir/../ckpts/last.ckpt system.exporter_type=mesh-exporter system.exporter.context_type=cuda system.geometry.isosurface_threshold=15.0