# ControlVideo

### Installation

Only for colab

In [None]:
!git clone --recurse-submodules https://github.com/rossiyareich/marching-waifu-x.git
%cd marching-waifu-x

Only for local install (run `setup.sh` or `setup.bat` and `huggingface-cli login` beforehand!)

In [None]:
%cd ../

Project setup

In [None]:
!nvidia-smi

# Install requirements
!python -m pip install -r requirements.txt
!python -m pip install -r ext/Real-ESRGAN/requirements.txt

# Create directories
!mkdir data/dataset/nerf/original/
!mkdir data/dataset/nerf/train/

# Install local packages
%cd ext/Real-ESRGAN/
!python setup.py develop
%cd ../../

In [None]:
%cd scripts

In [None]:
import sys
sys.path.append("..")

In [None]:
from huggingface_hub import notebook_login
notebook_login()

### Configuration

In [None]:
#@title Inference configuration

#@markdown ###**ControlVideo paths**
controlvideo_out_path = "../data/dataset/nerf/original/"  #@param {type:"string"}
controlvideo_sd_repo = "rossiyareich/Nabylon-v1.0-fp16"  #@param {type:"string"}
controlvideo_vae_repo = "stabilityai/sd-vae-ft-mse"  #@param {type:"string"}
controlvideo_controlnet_repo = "lllyasviel/control_v11p_sd15_openpose"  #@param {type:"string"}
controlvideo_ifnet_path = "../data/checkpoints/flownet.pkl"  #@param {type:"string"}
controlvideo_cache_dir = "../data/checkpoints/"  #@param {type:"string"}
#@markdown ###**ControlVideo**
controlvideo_prompt = "(masterpiece, best quality)+, 1girl, white hoodie, earmuffs, leggings, white scarf, black gloves, white socks, short blue hair, blue eyes, bangs"  #@param {type:"string"}
controlvideo_negative_prompt = "EasyNegative, (worst quality, low quality, logo, text, watermark, username, nsfw), inaccurate hands and fingers"  #@param {type:"string"}
controlvideo_textual_inversion_path = "../data/embeddings/"  #@param {type:"string"}
controlvideo_controlnet_conditioning_path = "../data/dataset/conditioning/"  #@param {type:"string"}
controlvideo_video_length = 15  #@param {type:"slider", min:1, max:250, step:1}
controlvideo_num_inference_steps = 50  #@param {type:"slider", min:1, max:200, step:1}
controlvideo_guidance_scale = 10  #@param {type:"slider", min:1, max:50, step:0.5}
controlvideo_smoother_stpes = "19,20"  #@param {type:"string"}
controlvideo_window_size = None  #@param {type:"raw"}
controlvideo_controlnet_conditioning_scale = 1.0  #@param {type:"slider", min:0, max:1, step:0.05}
controlvideo_seed = None  #@param {type:"raw"}

#@markdown ###**RealESRGAN**
realesrgan_in_path = controlvideo_out_path
realesrgan_out_path = "../data/dataset/nerf/train/"  #@param {type:"string"}
realesrgan_outscale = 4.0  #@param {type:"slider", min:1, max:100, step:0.01}
realesrgan_tile = 192  #@param {type:"slider", min:0, max:10000, step:1}
realesrgan_tile_pad = 16  #@param {type:"slider", min:0, max:10000, step:1}
realesrgan_pre_pad = 16  #@param {type:"slider", min:0, max:10000, step:1}
realesrgan_face_enhance = True  #@param {type:"boolean"}
realesrgan_fp32 = False  #@param {type:"boolean"}
realesrgan_gpu_id = 0  #@param {type:"integer"}

### Inference

In [None]:
import glob
import os
import pathlib
import PIL.Image
from src.utils.image_wrapper import *

def load_images():
    video_frames = []
    controlnet_conditions = []
    for filepath in sorted(
        glob.glob(os.path.combine(controlvideo_controlnet_conditioning_path, "*.png"))
    ):
        pl = pathlib.Path(filepath)
        video_frame_path = os.path.combine(controlvideo_out_path, pl.stem, ".png")
        video_frames.append(PIL.Image.open(video_frame_path))
        controlnet_conditions.append(PIL.Image.open(filepath))
    return (video_frames, controlnet_conditions)

def display_at_index(index, video_frames, controlnet_conditions):
    display(image_wrapper(video_frames[index], "pil").concatenate(
        image_wrapper(controlnet_conditions[index], "pil")
    ).to_pil())

In [None]:
!python inference_controlvideo_py --out_path $controlvideo_out_path --sd_repo $controlvideo_sd_repo --vae_repo $controlvideo_vae_repo --controlnet_repo $controlvideo_controlnet_repo --ifnet_path $controlvideo_ifnet_path --cache_dir $controlvideo_cache_dir --prompt $controlvideo_prompt --negative_prompt $controlvideo_negative_prompt --textual_inversion_path $controlvideo_textual_inversion_path --controlnet_conditioning_path $controlvideo_controlnet_conditioning_path --video_length $controlvideo_video_length --num_inference_steps $controlvideo_num_inference_steps --guidance_scale $controlvideo_guidance_scale --smoother_steps $controlvideo_smoother_steps --window_size $controlvideo_window_size --controlnet_conditioning_scale $controlvideo_controlnet_conditioning_scale --seed $controlvideo_seed

In [None]:
import time
from IPython.display import clear_output

display_interval = 1

video_frames, controlnet_conditions = load_images()
length = len(video_frames)

for i in range(length):
    display_at_index(i, video_frames, controlnet_conditions)
    time.sleep(display_interval)
    if i < length - 1:
        clear_output(True)

In [None]:
!python inference_realesrgan_py --in_path $realesrgan_in_path --out_path $realesrgan_out_path --outscale $realesrgan_outscale --tile_pad $realesrgan_tile_pad --pre_pad $realesrgan_pre_pad --face_enhance $realesrgan_face_enhance --fp32 $realesrgan_fp32 --gpu_id $realesrgan_gpu_id