<a href="https://colab.research.google.com/github/ameya1252/SeniorDesign/blob/main/fastnerfy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
import base64
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def record_video(filename='video.mp4', duration=5):
    js = Javascript("""
    async function recordVideo(duration) {
        // Create a video stream and a media recorder to record
        const stream = await navigator.mediaDevices.getUserMedia({video: true});
        let mediaRecorder = new MediaRecorder(stream);
        let chunks = [];

        // When data is available, push it to the chunks array
        mediaRecorder.ondataavailable = event => chunks.push(event.data);

        // Start recording
        mediaRecorder.start();

        // Create a video element to show the video stream
        let video = document.createElement('video');
        video.style.display = 'block';
        video.srcObject = stream;
        video.autoplay = true;
        video.muted = true;
        video.width = 224;
        document.body.appendChild(video);

        // Stop recording after the specified duration
        await new Promise(resolve => setTimeout(resolve, duration * 1000));
        mediaRecorder.stop();

        // Remove the video element and stop the video stream
        document.body.removeChild(video);
        stream.getTracks().forEach(track => track.stop());

        // Wait for the recorder to stop
        await new Promise(resolve => mediaRecorder.onstop = resolve);

        // Convert the chunks to a blob and create an object URL
        const blob = new Blob(chunks, {type: 'video/mp4'});
        const reader = new FileReader();

        // Read the blob as base64 data and return it
        reader.readAsDataURL(blob);
        await new Promise(resolve => reader.onloadend = resolve);
        return reader.result;
    }
    """)
    display(js)
    video_base64 = eval_js('recordVideo({})'.format(duration))
    video_data = base64.b64decode(video_base64.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(video_data)
    return filename

# Specify the duration of the recording in seconds
duration = 5  # Change this to your desired recording length
video_filename = record_video(duration=duration)

# After recording, save the video to your Google Drive
!cp "{video_filename}" "/content/drive/My Drive/"


<IPython.core.display.Javascript object>

In [None]:
#using existing dataset for demo
#video_filename = 'vid2.mp4'

In [None]:
# Step 2: Specify the path to your video file on Google Drive
video_path = '/content/drive/My Drive/'+video_filename  # Update this path
print(video_filename)
# Step 3: Install opencv-python if not already installed
!pip install opencv-python

# Import necessary libraries
import cv2
import numpy as np
import os
from PIL import Image
import glob

# Function to extract frames
def extract_frames(video_path, num_frames=100):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    interval = frame_count // num_frames

    os.makedirs('ExtractedFrames', exist_ok=True)
    count = 0
    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % interval == 0 and frame_idx < num_frames:
            frame_path = os.path.join('ExtractedFrames', f'frame_{frame_idx:03d}.jpg')
            cv2.imwrite(frame_path, frame)
            frame_idx += 1
        count += 1
        if frame_idx >= num_frames:
            break

    cap.release()

# Extract frames from the video
extract_frames(video_path)

# Continue with your existing process
image_directory = 'ExtractedFrames'  # Directory where frames are saved
output_file = 'tiny_nerf_data.npz'  # Output file name
H, W = 120, 156  # Desired image height and width, adjust as needed
focal_length = 112  # Example focal length, adjust based on your needs

# Load images
image_files = sorted(glob.glob(f'{image_directory}/*.jpg'))
images = []
for file in image_files:
    img = Image.open(file).convert('RGB')
    img = img.resize((W, H))
    images.append(np.array(img))
images = np.array(images, dtype=np.float32) / 255.0  # Normalize images

# Example poses (placeholder, replace with actual poses data)
poses = np.zeros((len(images), 4, 4), dtype=np.float32)
for i in range(len(images)):
    poses[i] = np.eye(4, dtype=np.float32)

# Save to .npz file
np.savez(output_file, images=images, poses=poses, focal=focal_length)

print(f'Dataset saved to {output_file} with {len(images)} images.')


vid2.mp4
Dataset saved to tiny_nerf_data.npz with 100 images.


In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    %tensorflow_version 2.x

import os, sys
import tensorflow as tf
tf.compat.v1.enable_eager_execution()

from tqdm import tqdm_notebook as tqdm
import numpy as np
import matplotlib.pyplot as plt

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming you've already loaded your dataset into 'data'
data = np.load('tiny_nerf_data.npz')
images = data['images']
poses = data['poses']
focal = data['focal']

# Determine image and pose dimensions
H, W = images.shape[1:3]
print(images.shape, poses.shape, focal)

# Adjustment for test image and pose selection
testimg, testpose = images[-50], poses[-50]  # Use the last image as test image
images = images[:99,...,:3]  # Use the first 99 images for training
poses = poses[:99]  # Corresponding poses for the training images

# Display test image
plt.imshow(testimg)
plt.show()


In [None]:
import tensorflow as tf

def posenc(x):
    rets = [x]
    for i in range(L_embed):
        for fn in [tf.sin, tf.cos]:
            rets.append(fn(2.**i * x))
    return tf.concat(rets, -1)

L_embed = 6
embed_fn = posenc

def init_model(D=8, W=256, L=6):
    relu = tf.keras.layers.ReLU()
    dense = lambda W=W, act=relu: tf.keras.layers.Dense(W, activation=act)

    inputs = tf.keras.Input(shape=(3 + 3 * 2 * L))
    outputs = inputs
    for i in range(D):
        outputs = dense()(outputs)
        if i % 4 == 0 and i > 0:
            outputs = tf.concat([outputs, inputs], -1)
    outputs = dense(4, act=None)(outputs)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model


def get_rays(H, W, focal, c2w):
    i, j = tf.meshgrid(tf.range(W, dtype=tf.float32), tf.range(H, dtype=tf.float32), indexing='xy')
    dirs = tf.stack([(i - W * .5) / focal, -(j - H * .5) / focal, -tf.ones_like(i)], -1)
    rays_d = tf.reduce_sum(dirs[..., np.newaxis, :] * c2w[:3, :3], -1)
    rays_o = tf.broadcast_to(c2w[:3, -1], tf.shape(rays_d))
    return rays_o, rays_d

def render_rays(network_fn, rays_o, rays_d, near, far, N_samples, rand=False):

    def batchify(fn, chunk=1024 * 32):
        return lambda inputs: tf.concat([fn(inputs[i:i + chunk]) for i in range(0, inputs.shape[0], chunk)], 0)

    # Compute 3D query points
    z_vals = tf.linspace(near, far, N_samples)
    if rand:
        z_vals += tf.random.uniform(list(rays_o.shape[:-1]) + [N_samples]) * (far - near) / N_samples
    pts = rays_o[..., None, :] + rays_d[..., None, :] * z_vals[..., :, None]

    # Run network
    pts_flat = tf.reshape(pts, [-1, 3])
    pts_flat = embed_fn(pts_flat)
    raw = batchify(network_fn)(pts_flat)
    raw = tf.reshape(raw, list(pts.shape[:-1]) + [4])

    # Compute opacities and colors
    sigma_a = tf.nn.relu(raw[..., 3])
    rgb = tf.math.sigmoid(raw[..., :3])

    # Do volume rendering
    dists = tf.concat([z_vals[..., 1:] - z_vals[..., :-1], tf.broadcast_to([1e10], z_vals[..., :1].shape)], -1)
    alpha = 1. - tf.exp(-sigma_a * dists)
    weights = alpha * tf.math.cumprod(1. - alpha + 1e-10, -1, exclusive=True)

    rgb_map = tf.reduce_sum(weights[..., None] * rgb, -2)
    depth_map = tf.reduce_sum(weights * z_vals, -1)
    acc_map = tf.reduce_sum(weights, -1)

    return rgb_map, depth_map, acc_map


In [None]:
model = init_model()
##initial_learning_rate = 5e-4
##lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
  ##  initial_learning_rate, decay_steps=100, decay_rate=0.9, staircase=True)

##optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
optimizer = tf.keras.optimizers.Adam(5e-4)

near = 5.
far = 6.


N_samples = 64
N_iters = 1000
psnrs = []
iternums = []
i_plot = 25

import time
t = time.time()
for i in range(N_iters+1):

    img_i = np.random.randint(images.shape[0])
    target = images[img_i]
    pose = poses[img_i]
    rays_o, rays_d = get_rays(H, W, focal, pose)
    with tf.GradientTape() as tape:
        rgb, depth, acc = render_rays(model, rays_o, rays_d, near=near, far=far, N_samples=N_samples, rand=True)
        loss = tf.reduce_mean(tf.square(rgb - target))
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    if i%i_plot==0:
        print(i, (time.time() - t) / i_plot, 'secs per iter')
        t = time.time()

        # Render the holdout view for logging
        rays_o, rays_d = get_rays(H, W, focal, testpose)
        rgb, depth, acc = render_rays(model, rays_o, rays_d, near=2., far=6., N_samples=N_samples)
        loss = tf.reduce_mean(tf.square(rgb - testimg))
        psnr = -10. * tf.math.log(loss) / tf.math.log(10.)

        psnrs.append(psnr.numpy())
        iternums.append(i)

        plt.figure(figsize=(10,4))
        plt.subplot(121)
        plt.imshow(rgb)
        plt.title(f'Iteration: {i}')
        plt.subplot(122)
        plt.plot(iternums, psnrs)
        plt.title('PSNR')
        plt.show()

print('Done')

In [None]:
%matplotlib inline
from ipywidgets import interactive, widgets


trans_t = lambda t : tf.convert_to_tensor([
    [1,0,0,0],
    [0,1,0,0],
    [0,0,1,t],
    [0,0,0,1],
], dtype=tf.float32)

rot_phi = lambda phi : tf.convert_to_tensor([
    [1,0,0,0],
    [0,tf.cos(phi),-tf.sin(phi),0],
    [0,tf.sin(phi), tf.cos(phi),0],
    [0,0,0,1],
], dtype=tf.float32)

rot_theta = lambda th : tf.convert_to_tensor([
    [tf.cos(th),0,-tf.sin(th),0],
    [0,1,0,0],
    [tf.sin(th),0, tf.cos(th),0],
    [0,0,0,1],
], dtype=tf.float32)


def pose_spherical(theta, phi, radius):
    c2w = trans_t(radius)
    c2w = rot_phi(phi/180.*np.pi) @ c2w
    c2w = rot_theta(theta/180.*np.pi) @ c2w
    c2w = np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]]) @ c2w
    return c2w

def f(**kwargs):
    c2w = pose_spherical(**kwargs)
    rays_o, rays_d = get_rays(H, W, focal, c2w[:3,:4])
    rgb, depth, acc = render_rays(model, rays_o, rays_d, near=near, far=far, N_samples=N_samples)
    img = np.clip(rgb,0,1)

    plt.figure(2, figsize=(20,6))
    plt.imshow(img)
    plt.show()


sldr = lambda v, mi, ma: widgets.FloatSlider(
    value=v,
    min=mi,
    max=ma,
    step=.01,
)

names = [
    ['theta', [100., 0., 360]],
    ['phi', [-30., -90, 0]],
    ['radius', [4., 3., 5.]],
]

interactive_plot = interactive(f, **{s[0] : sldr(*s[1]) for s in names})
output = interactive_plot.children[-1]
output.layout.height = '350px'
interactive_plot

In [None]:
frames = []
for th in tqdm(np.linspace(0., 360., 120, endpoint=False)):
    c2w = pose_spherical(th, -30., 4.)
    rays_o, rays_d = get_rays(H, W, focal, c2w[:3,:4])
    rgb, depth, acc = render_rays(model, rays_o, rays_d, near=near, far=far, N_samples=N_samples)
    frames.append((255*np.clip(rgb,0,1)).astype(np.uint8))

import imageio
f = 'video.mp4'
imageio.mimwrite(f, frames, fps=30, quality=10)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for th in tqdm(np.linspace(0., 360., 120, endpoint=False)):


  0%|          | 0/120 [00:00<?, ?it/s]

  self.pid = _posixsubprocess.fork_exec(


In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('video.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls autoplay loop>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
!cp video.mp4 "/content/drive/My Drive/video.mp4"
