# **Geometry-biased Transformers for Novel View Synthesis**

### In this notebook you can evaluate our pre-trained models on sample datasets from the CO3D dataset.

Install necessary libraries (might take a few minutes due to torch re-installation)

In [None]:
# Install torch==1.12
!pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113

# Install PyTorch3D
import sys
import torch
assert torch.__version__.startswith("1.12.") and sys.platform.startswith("linux")
pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
version_str="".join([f"py3{sys.version_info.minor}_cu", torch.version.cuda.replace(".",""), f"_pyt{pyt_version_str}"])
!pip install fvcore iopath
!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html

# Install other libraries
!pip install omegaconf hydra-core accelerate matplotlib plotly
!pip install --upgrade --no-cache-dir gdown

# Install co3d dataset
!pip install "git+https://github.com/facebookresearch/co3d.git"

# Setup GBT 
!git clone https://github.com/mayankgrwl97/gbt.git
%cd gbt

import os
import pickle
import numpy as np
from omegaconf import OmegaConf
from IPython.display import Image
from gbt.model.model import GeometryBiasedTransformer
from gbt.utils.image import (convert_tensor_to_images, save_images_as_gif,
                             stitch_images)

Download pre-trained models ([link](https://drive.google.com/file/d/1eHeNba_qlsM-7iEiIlZw9XH9-VXqem7T/view?usp=sharing)) and sample data ([link](https://drive.google.com/file/d/1cvvS3nYatHVO6S_7EC3pE07Jt8DArJqP/view?usp=sharing)). Alternatively, run the following cell to download using `gdown`.

In [None]:
!gdown 1eHeNba_qlsM-7iEiIlZw9XH9-VXqem7T
!unzip runs.zip
!rm runs.zip

!gdown 1cvvS3nYatHVO6S_7EC3pE07Jt8DArJqP
!unzip data.zip
!rm data.zip

Load pre-trained model

In [None]:
cfg = OmegaConf.load('configs/cat_agnostic_gbt.yaml')
model = GeometryBiasedTransformer(cfg.model)
device = cfg.infer.device
model.load_state_dict(torch.load(cfg.infer.load_path))
model = model.to(device).eval()

Load sample data

In [None]:
with open('data/134_15451_31119.pkl', 'rb') as handle:
    batch = pickle.load(handle)

input_views = batch["sparse_input_images"].to(device)  # (B, num_input_views, C, H, W)
input_cameras = batch["sparse_input_cameras"]  # 2-d list of cameras of shape (B, num_input_views)
query_img = batch["sparse_query_images"].to(device)  # (B, num_query_views, C, H, W)
query_cameras = batch["sparse_query_cameras"]  # 2-d list of cameras of shape (B, num_query_views)

Infer novel-view synthesis for multiple views and visualize as a revolving GIF

In [None]:
num_query_views = query_img.shape[1]
stitched_images = []
image_size = tuple(query_img.shape[-2:])
for q_idx in range(num_query_views):
    pred_img = model.infer(input_views=input_views[:1], input_cameras=input_cameras[:1],
                            query_cameras=[query_cameras[0][q_idx]], image_size=image_size)  # (1, H*W, 3)
    pred_img = pred_img.reshape(1, *image_size, 3) # (1, H, W, 3)

    # (N, H, W, 3) - [0, 1]
    input_image, query_image, pred_image = convert_tensor_to_images(
        input_views[0], query_img[:1, q_idx], pred_img)

    stitched_image = stitch_images(input_image, query_image, pred_image)
    stitched_image = (stitched_image * 255).astype(np.uint8)
    stitched_images.append(stitched_image)

save_images_as_gif(save_path="predicted.gif", images=stitched_images, fps=15)

print("        Input View 1                  Input View 2                  Input View 3                  Ground Truth                   Predicted          ")
Image(open("predicted.gif", 'rb').read())