In [1]:
#@markdown Check type of GPU and VRAM available.
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

Tesla T4, 15360 MiB, 15101 MiB



# FateZero: Fusing Attentions for Zero-shot Text-based Video Editing


[![Project Website](https://img.shields.io/badge/Project-Website-orange)](https://fate-zero-edit.github.io/)


In [1]:
#@title Install requirements

!git clone https://github.com/ChenyangQiQi/FateZero /content/FateZero

!apt install ffmpeg
!mkdir /content/FateZero/data/corgi/
!ffmpeg -hide_banner -loglevel error -i corgi.mp4 -vf scale="512:512" -vf fps=25 /content/FateZero/data/corgi/%05d.png

%cd /content/FateZero
# %pip install -r requirements.txt
%pip install -q -U --pre triton
%pip install -q diffusers[torch]==0.11.1 transformers==4.26.0 bitsandbytes==0.35.4 \
decord accelerate omegaconf einops ftfy gradio imageio-ffmpeg xformers

Cloning into '/content/FateZero'...
remote: Enumerating objects: 1534, done.[K
remote: Counting objects: 100% (438/438), done.[K
remote: Compressing objects: 100% (200/200), done.[K
remote: Total 1534 (delta 317), reused 341 (delta 235), pack-reused 1096[K
Receiving objects: 100% (1534/1534), 197.52 MiB | 20.99 MiB/s, done.
Resolving deltas: 100% (491/491), done.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:4.2.7-0ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.
[1;31mOnly '-vf fps=25' read, ignoring remaining -vf options: Use ',' to separate filters
[0m/content/FateZero


In [2]:
#@title Download pretrained model

#@markdown Name/Path of the initial model.
MODEL_NAME = "stabilityai/stable-diffusion-2-base" #@param {type:"string"}

#@markdown If model should be download from a remote repo. Untick it if the model is loaded from a local path.
download_pretrained_model = True #@param {type:"boolean"}
if download_pretrained_model:
    !git lfs install
    !git clone https://huggingface.co/$MODEL_NAME ckpt/$MODEL_NAME
    MODEL_NAME = f"./ckpt/{MODEL_NAME}"
print(f"[*] MODEL_NAME={MODEL_NAME}")

Updated git hooks.
Git LFS initialized.
Cloning into 'ckpt/stabilityai/stable-diffusion-2-base'...
remote: Enumerating objects: 134, done.[K
remote: Counting objects: 100% (125/125), done.[K
remote: Compressing objects: 100% (71/71), done.[K
remote: Total 134 (delta 48), reused 125 (delta 48), pack-reused 9[K
Receiving objects: 100% (134/134), 531.92 KiB | 1.02 MiB/s, done.
Resolving deltas: 100% (48/48), done.
Filtering content: 100% (8/8), 11.32 GiB | 36.23 MiB/s, done.
Encountered 2 file(s) that may not have been copied correctly on Windows:
	512-base-ema.safetensors
	512-base-ema.ckpt

See: `git lfs help smudge` for more details.
[*] MODEL_NAME=./ckpt/stabilityai/stable-diffusion-2-base


# **Usage**


## FateZero Edit with low resource cost


In [3]:
#@markdown Edit config

#@markdown More details of the configuration will be given soon.

from omegaconf import OmegaConf

VIDEO_DIR = 'data/corgi' #@param {type:"string"}

VIDEO_ID = VIDEO_DIR.split('/')[-1]

RESULT_DIR = 'result/'+VIDEO_ID

CONFIG_NAME = "config/"+VIDEO_ID+".yaml" 

source_prompt = "A corgi dog standing on back legs" #@param {type:"string"}
edit_prompt = "A lion standing on back legs"  #@param {type:"string"}
EMPHYSIS_WORD = "" #@param {type:"string"}
EMPHYSIS_VALUE = 10 #@param {type:"number"}
video_length = 8 #@param {type:"number"}
INVERSION_STEP = 8 #@param {type:"number"}
REPLACE_STRENGTH = 0.8 #@param {type:"slider", min:0, max:1, step:0.1}
STORE_ATTENTION_ON_disk = True #@param {type:"boolean"}
width = 512 
height = 512 

config = {
  "pretrained_model_path": MODEL_NAME,
  "logdir": RESULT_DIR,
  "dataset_config": {
    "path": VIDEO_DIR,
    "prompt": source_prompt,
    "n_sample_frame": video_length,
    "sampling_rate": 1,
    "stride": 80,
    "offset": 
    {
        "left": 0,
        "right": 0,
        "top": 0,
        "bottom": 0,
    }
  },
  "editing_config":{
      "use_invertion_latents": True,
      "use_inversion_attention": True,
      "guidance_scale": 7.5,
      "editing_prompts":[
          source_prompt,
          edit_prompt,
      ],
      "p2p_config":[ 
          {
          "cross_replace_steps":{
              "default_":0.8
              },
          "self_replace_steps": 0.8,
          "blend_self_attention": True,
           "blend_th": [2, 2],
          "is_replace_controller": False 
          },
          {
          "cross_replace_steps":{
              "default_":0.8
              },
          "self_replace_steps": 0.8,
          "eq_params":{
              "words":[EMPHYSIS_WORD],
              "values": [EMPHYSIS_VALUE]
            },
          "use_inversion_attention": True,
          "is_replace_controller": False 
          }]
          ,
    "clip_length": "${..dataset_config.n_sample_frame}",
    "sample_seeds": [0],
    "num_inference_steps": INVERSION_STEP,
    "prompt2prompt_edit": True
     },
  "disk_store": STORE_ATTENTION_ON_disk,
  "model_config":{
      "lora": 160,
      "SparseCausalAttention_index": ['mid'],
      "least_sc_channel": 640
  },
  "test_pipeline_config":{
    "target": "video_diffusion.pipelines.p2p_ddim_spatial_temporal.P2pDDIMSpatioTemporalPipeline",
    "num_inference_steps": "${..validation_sample_logger.num_inference_steps}"
  },
  "seed": 0,
}

OmegaConf.save(config, CONFIG_NAME)
print('save new configue to ', CONFIG_NAME)

save new configue to  config/corgi.yaml


In [4]:
!rm -Rf result/*

In [5]:
!accelerate launch test_fatezero.py --config=$CONFIG_NAME

2023-05-08 12:54:10.667566: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[2;36m           [0m         `accelerate launch` and had defaults used      [2m             [0m
[2;36m           [0m         instead:                                       [2m             [0m
[2;36m           [0m                 `--num_processes` was set to a value   [2m             [0m
[2;36m           [0m         of `[1;36m1[0m`                                         [2m             [0m
[2;36m           [0m                 `--num_machines` was set to a value of [2m             [0m
[2;36m           [0m         `[1;36m1[0m`                                            [2m             [0m
[2;36m           [0m                 `--mixed_precision` was set

### Show the results

In [7]:
from IPython.display import HTML
from base64 import b64encode
import os, sys
import glob

mp4_name = sorted(glob.glob('./result/*/sample/step_0.mp4'))[-1]
mp4 = open('{}'.format(mp4_name),'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

print('Display animation: {}'.format(mp4_name), file=sys.stderr)
display(HTML("""
  <video width=512 controls>
        <source src="%s" type="video/mp4">
  </video>
  """ % data_url))


mp4_name_atten = sorted(glob.glob('./result/*/sample/step_0atten.mp4'))[-1]
mp4_atten = open('{}'.format(mp4_name_atten),'rb').read()
data_url_atten = "data:video/mp4;base64," + b64encode(mp4_atten).decode()

print('Display animation attention: {}'.format(mp4_name_atten), file=sys.stderr)
display(HTML("""
  <video width=1024 controls>
        <source src="%s" type="video/mp4">
  </video>
  """ % data_url_atten))

Display animation: ./result/corgi_230508-125424/sample/step_0.mp4


Display animation attention: ./result/corgi_230508-125424/sample/step_0atten.mp4
