### Setup

Run `setup.sh` (once) beforehand

In [None]:
# Setup torch and other prerequisites
%cd ..
!python -m pip install --upgrade pip
!python -m pip install gdown ipykernel ipywidgets
!python -m pip install torch torchvision torchaudio torchtext torchdata --index-url https://download.pytorch.org/whl/cu118

# Update submodules
!git submodule update --init --recursive

# Create directories
!mkdir data/checkpoints
!mkdir data/dataset/nvdiffrec/original
!mkdir data/dataset/nvdiffrec/upscaled
!mkdir data/dataset/nvdiffrec/train

# Install requirements
!python -m pip install -r requirements.txt

# Setup RealESRGAN
%cd ext/Real-ESRGAN
!python -m pip install -r requirements.txt
!python setup.py develop
%cd ../..

# Setup GroundingDINO + SegmentAnything
%cd ext/Grounded-Segment-Anything
!python -m pip install -q -r requirements.txt
%cd GroundingDINO
!python -m pip install -q .
%cd ../segment_anything
!python -m pip install -q .
%cd ../../../..

### Imports

In [None]:
%cd marching-waifu-x/scripts

In [None]:
import sys

sys.path.append("..")

import gc
import glob
import json
import os

import cv2
import PIL.Image
from IPython.display import Image

from src.utils.image_wrapper import *

### ControlVideo

In [None]:
def save_gif(fps, images, scale, gif_path):
    images = [image_wrapper(image).scale(scale).to_pil() for image in images]
    images[0].save(
        gif_path,
        save_all=True,
        append_images=images[1:],
        optimize=False,
        duration=len(images) / fps,
    )


def display_gif(gif_path):
    display(Image(data=open(gif_path, "rb").read(), format="png"))

In [None]:
with open("inference_controlvideo.json", "r") as f:
    controlvideo_conf = json.load(f)

with open("inference_realesrgan.json", "r") as f:
    realesrgan_conf = json.load(f)

In [None]:
!python inference_controlvideo.py --settings_path "inference_controlvideo.json" 

In [None]:
!python inference_realesrgan.py --settings_path "inference_realesrgan.json"

In [None]:
# Load images
images = sorted(glob.glob(os.path.join(realesrgan_conf["paths"]["out_path"], "*.png")))
images = [PIL.Image.open(image) for image in images]

save_gif(10.0, images, 0.25, "../ipynb/controlvideo_0.25x.gif")
del images
gc.collect()

display_gif("../ipynb/controlvideo_0.25x.gif")

### GroundingDINO + SegmentAnything

In [None]:
with open("inference_groundedsam.json", "r") as f:
    groundedsam_conf = json.loads(f)

In [None]:
!python inference_groundedsam.py --settings_file inference_groundedsam.json

In [None]:
# Load images
images = sorted(glob.glob(os.path.join(realesrgan_conf["paths"]["out_path"], "*.png")))
images = [PIL.Image.open(image) for image in images]

# Load masked images
masked_images = sorted(
    glob.glob(
        os.path.join(
            groundedsam_conf["paths"]["out_path"],
            f"{groundedsam_conf['paths']['file_prefix']}*.png",
        )
    )
)
masked_images = [PIL.Image.open(masked) for masked in masked_images]

# Create image strips
image_strips = []
for i, image in enumerate(images):
    image = image_wrapper(image, "pil")
    masked_image = image_wrapper(masked_images[i], "pil")
    image.concatenate(masked_image)
    image_strips.append(image.to_pil())

save_gif(
    10.0,
    image_strips,
    0.25,
    "../ipynb/groundedsam_0.25x.gif",
)
del images
del masked_images
del image_strips
gc.collect()

display_gif("../ipynb/groundedsam_0.25x.gif")