# Inference

In [28]:
import diffusers
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
import torch
import cv2
import numpy as np
import os
import random
from tqdm import tqdm

In [21]:
feature_extractor = CLIPImageProcessor.from_pretrained('/mnt/data/sonia/svd/outputs/feature_extractor')
image_encoder = CLIPVisionModelWithProjection.from_pretrained('/mnt/data/sonia/svd/outputs/image_encoder')
scheduler = diffusers.DDPMScheduler.from_pretrained("/mnt/data/sonia/svd/outputs/scheduler")
unet = diffusers.UNetSpatioTemporalConditionModel.from_pretrained('/mnt/data/sonia/svd/outputs/unet')
vae = diffusers.AutoencoderKLTemporalDecoder.from_pretrained('/mnt/data/sonia/svd/outputs/vae')
pipeline = diffusers.StableVideoDiffusionPipeline.from_pretrained(
    'stabilityai/stable-video-diffusion-img2vid',
    image_encoder=image_encoder,
    vae=vae,
    unet=unet,
    # revision=args.revision,
)
width, height = img.size
num_frames = 8
n = 500 # desired eval set size

Loading pipeline components...: 100%|██████████| 5/5 [00:00<00:00, 239.48it/s]


In [None]:
# just to test..
img = diffusers.utils.load_image('demo.png')
video_frames = pipeline(
    img,
    height=height,
    width=width,
    num_frames=num_frames,
    decode_chunk_size=8,
    motion_bucket_id=127,
    fps=7,
    noise_aug_strength=0.02,
    # generator=generator,
).frames[0]
fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
video = cv2.VideoWriter(f'sample.mp4', fourcc, 1.5, (width, height))
for i in range(num_frames):
    video.write(np.array(video_frames[i]))
video.release()

100%|██████████| 25/25 [01:32<00:00,  3.71s/it]


In [None]:
datadir = '/mnt/data/sonia/occetc/vars3-25.04.05/'
outputdir = os.path.join(datadir, 'evals')
if not os.path.exists(outputdir):
    os.makedirs(outputdir)
    
allexamples = os.listdir(datadir)
examples = random.sample(examples, n)
with open(os.path.join(outputdir, 'examples.txt'), 'w+') as f:
    f.write('\n'.join(examples))
preds = []

for example in tqdm(examples):
    imgs = [] 
    for i in range(num_frames):
        img = diffusers.utils.load_image(os.path.join(datadir, example, f'{i}.png'))
        imgs.append(img)
        
    sample = pipeline(
        imgs[0],
        height=height,
        width=width,
        num_frames=num_frames,
        decode_chunk_size=8,
        motion_bucket_id=127,
        fps=7,
        noise_aug_strength=0.02,
    ).frames[0]
    preds.append(sample)
    os.makedirs(os.path.join(outputdir, example), exist_ok=True)
    for i in range(num_frames):
        sample[i].save(os.path.join(outputdir, example, f'{i}.png'))

  0%|          | 0/25 [00:03<?, ?it/s]]
  0%|          | 0/500 [00:06<?, ?it/s]


KeyboardInterrupt: 

# Eval

In [6]:
import os 
import numpy as np 
from PIL import Image as im
trainpath = '/mnt/data/sonia/occetc/vars3-25.04.05'
synthpath = '/mnt/data/sonia/occetc/vars3-25.04.05/evals'

In [10]:
# load train 
train = []
for stormdir in sorted(os.listdir(trainpath)):
    if not os.path.isdir(os.path.join(trainpath, stormdir)) or stormdir=='evals':
        continue
    storm = []
    for i in range(8):
        p = os.path.join(trainpath, stormdir, f'{i}.png')
        img = im.open(p)
        storm.append(np.array(img))
    train.append(np.stack(storm))

In [11]:
# load synthetics 
synth = []
for stormdir in sorted(os.listdir(synthpath)):
    if not os.path.isdir(os.path.join(synthpath, stormdir)):
        continue
    storm = []
    for i in range(8):
        p = os.path.join(synthpath, stormdir, f'{i}.png')
        img = im.open(p)
        storm.append(np.array(img))
    synth.append(np.stack(storm))

In [12]:
synth[0].shape

(8, 32, 32, 3)

Univariate distributions

Correlations