# Code Practice : NeRF

## Importing Libraries

In [6]:
import os
from PIL import Image
import numpy as np 
import torch
import matplotlib.pyplot as plt 

## Preprocessing Data

- 데이터셋은 tiny_nerf_data를 씁니다.


- 데이터셋은 아래의 링크에서 다운로드 받을 수 있습니다.


- [tiny_nerf_data](http://cseweb.ucsd.edu/~viscomp/projects/LF/papers/ECCV20/nerf/tiny_nerf_data.npz)




In [33]:
# Specify the environment path
PATH = 'C:/Users/user/anaconda3/envs/NeRF' # check your development environment path

# Load tiny_nerf_data
tiny_nerf_data = np.load(os.path.join(PATH, 'tiny_nerf_data.npz'))

# Check the array name of tiny_nerf_data
print('The array name of tiny_nerf_data: ', tiny_nerf_data.files)

# Check the array shape of tiny_nerf_data
for name in tiny_nerf_data.files:
    print(name,':', tiny_nerf_data[name].shape)

# Set the device
device = torch.device('cuda'if torch.cuda.is_available else 'cpu')

# Define variables of dataset
images = tiny_nerf_data['images']
poses = tiny_nerf_data['poses']
focal = tiny_nerf_data['focal']

# Get the number of images, and the length of height and width
num_images, height, width = images.shape[:-1]

# Split the dataset into training set and test set 
test_idx = 101
test_image = images[test_idx]
test_pose = poses[test_idx]

# Move training variables to the device
images = torch.from_numpy(images[:100, ... , :3]).to(device)
poses = torch.from_numpy(poses).to(device)
focal = torch.from_numpy(focal).to(device)

The array name of tiny_nerf_data:  ['images', 'poses', 'focal']
images : (106, 100, 100, 3)
poses : (106, 4, 4)
focal : ()


AssertionError: Torch not compiled with CUDA enabled

## Utility Functions

In [35]:
# Compute the origin and direction vectors 
def get_rays(height : int, width : int, focal : float, pose : torch.Tensor) : 
    '''
    Inputs:
        height - Int. the height of an image
        width - Int. the width of an image
        focal - Float. focal length of the camera
        pose - torch.Tensor. the pose of an image 
        
    Outputs:
        rays_o - torch.Tensor. origin vector of the ray
        rays_d - torch.Tensor. direction vector of the ray
    '''
    
    # Use torch.meshgrid to build a meshgrid of size (height X width)
    i, j = torch.meshgrid(torch.arange(width, dtype = torch.float32).to(pose), 
                          torch.arange(height, dtype = torch.float32).to(pose), 
                          indexing = 'ij')
    
    # Use .transpose method to reshape the meshgrid
    i, j = i.transpose(-1, -2), j.transpose(-1, -2)
    
    # Calculate the x,y,z coordinates of a direction vector, ray_d
    # Measure the distance between current pixel coordinates and the center of the image on x,y axis.
    # Normalize x,y coordinates and set a z coordinate to 1. 
    rays_d = torch.stack([i - weight * .5 / focal, -(j - height * .5) / focal, torch.ones_like(i)], dim = -1)
    
    # Multiply rays_d with the camera pose to rotate w.r.t. world coordinates
    rays_d = torch.sum(rays_d[..., None, :] * pose[:3, :3], dim = -1) # pose[:3, :3] is the rotation part.
    
    rays_o = pose[:3, -1].expand(rays_d.shape)
    
    return rays_o, rays_d

In [20]:
# Positional Encoding
def positional_encoding(p : torch.tensor, L : int) -> torch.tensor:
    '''
    Inputs:
        p - Tensor. p can be 3 coordinate values in vector x or Catersian viewing direction unit vector d.
            p lies in [-1, 1].
        L - Int. Dimensionality of positional encoding.
    
    Output:
        gamma_p - Tensor. The positional encoding of p.
    '''
    gamma_p =[]
    
    frequency = 2.0 ** torch.linspace(0, L-1, L, dtype = p.dtype, device = p.device)
    
    for freq in frequency:
        gamma_p.append(freq * torch.pi * p)
        gamma_p.append(freq * torch.pi * p)
        
    gamma_p = torch.cat(gamma_p, dim = -1)
    
    return gamma_p

In [22]:
p = torch.Tensor([-0.51, 0.125, 0.68])
print(positional_encoding(p, L = 4))

tensor([ -1.6022,   0.3927,   2.1363,  -1.6022,   0.3927,   2.1363,  -3.2044,
          0.7854,   4.2726,  -3.2044,   0.7854,   4.2726,  -6.4088,   1.5708,
          8.5451,  -6.4088,   1.5708,   8.5451, -12.8177,   3.1416,  17.0903,
        -12.8177,   3.1416,  17.0903])


In [None]:
# Classical Volume Rendering
def classical_volume_rendering(t_n, t_f, N):
    '''
    Inputs:
        t_n - the nearest boundary point of a camera ray. 
        t_f - the farthest boundary point of a camera ray. 
        N - 
    
    '''
    
    
    
    return 

In [None]:
# Stratified Sampling
def stratified_sampling()