In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import sys

import json

import torch
from torchvision.transforms.functional import to_tensor, to_pil_image, resize
from torchvision.transforms.functional import InterpolationMode

import zipfile

import numpy as np
import pandas as pd

from PIL import Image
from matplotlib import pyplot
from torchvision.transforms.functional import resize, center_crop

from tqdm.auto import tqdm

In [2]:
BATCH_SIZE = 16

In [3]:
from diffusers import AutoPipelineForText2Image

model_id = 'runwayml/stable-diffusion-v1-5'

stable_diffusion = AutoPipelineForText2Image.from_pretrained(
    model_id, local_files_only=True, torch_dtype=torch.float16, variant="fp16"
)
stable_diffusion.vae.eval().cuda();

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


# from jpg

In [14]:
def worker(dataset_name):
    
    file_names = list()
    for dname in os.listdir(os.path.join(ROOT, 'image', dataset_name)):
        for fname in os.listdir(os.path.join(ROOT, 'image', dataset_name, dname)):
            file_names.append(os.path.join(dname, fname))
    
    latents = list()
    
    for i in tqdm(range(0, len(file_names), BATCH_SIZE), leave=False):
        
        with torch.no_grad():
    
            images = list()
            for file_name in file_names[i:i+BATCH_SIZE]:
                image = Image.open(os.path.join(ROOT, 'image', dataset_name, file_name)).convert('RGB')
                images.append(to_tensor(image).half().cuda())
            images = torch.stack(images, dim=0)
        
            images = (images - 0.5) * 2.0
            latent = stable_diffusion.vae.encode(images).latent_dist.mean
            
            latents.append(latent.detach().cpu().numpy())
    
    latents = np.concatenate(latents, axis=0)
    
    np.savez(
        os.path.join('./data', dataset_name, 'vae_latents.npz'),
        latents=latents,
        file_names=np.array(file_names)
    )

In [15]:
ROOT = '../kcg-ml-image-pipeline/output/dataset/'

DATASETs = [
    'environmental',
    'waifu',
    'propaganda-poster'
]

In [16]:
for dataset_name in DATASETs:
    worker(dataset_name)

  0%|          | 0/1156 [00:00<?, ?it/s]

# from NPZ

In [17]:
def worker(input_path, output_path):

    f = zipfile.ZipFile(input_path)

    file_names = list()
    for fname in f.namelist():
        if not fname.endswith('.jpg'):
            continue
        file_names.append(fname)

    latents = list()
    
    for i in tqdm(range(0, len(file_names), BATCH_SIZE), leave=False):
        
        with torch.no_grad():
    
            images = list()
            for file_name in file_names[i:i+BATCH_SIZE]:
                image = Image.open(f.open(file_name)).convert('RGB')
                images.append(to_tensor(image).half().cuda())
            images = torch.stack(images, dim=0)
        
            images = (images - 0.5) * 2.0
            latent = stable_diffusion.vae.encode(images).latent_dist.mean
            
            latents.append(latent.detach().cpu().numpy())
    
    latents = np.concatenate(latents, axis=0)
    
    np.savez(
        output_path,
        latents=latents,
        file_names=np.array(file_names)
    )

In [18]:
file_paths = [
    './generated/generated-1116.zip',
    './generated/generated-1117.zip',
    './generated/generated-1118.zip',
    './generated/generated-1120.zip',
    './generated/generated-1122.zip',
    './generated/generated-1123.zip',
    './generated/generated-1125.zip',
    './generated/generated-1126.zip'
]

In [None]:
for file_path in file_paths:
    worker(file_path, file_path.replace('.zip', '-vae_latents.npz'))

  0%|          | 0/580 [00:00<?, ?it/s]

  0%|          | 0/1250 [00:00<?, ?it/s]

In [20]:
file_paths = [
    './generated/generated-1208-image.zip',
    './generated/generated-1210-image.zip',
    './generated/generated-1214-image.zip',
]

In [None]:
for file_path in file_paths:
    worker(file_path, file_path.replace('-image.zip', '-vae_latents.npz'))

# from jpg

In [4]:
def worker(root, file_names, output_path):
    
    latents = list()
    fnames = list()
    for i in tqdm(range(0, len(file_names), BATCH_SIZE), leave=False):
        
        with torch.no_grad():
    
            images = list()
            for file_name in file_names[i:i+BATCH_SIZE]:
                try:
                    image = Image.open(os.path.join(root, file_name)).convert('RGB')#.resize((512, 512))
                    image = center_crop(resize(image, size=512), output_size=(512, 512))
                    images.append(to_tensor(image).half().cuda())
                except:
                    continue
            images = torch.stack(images, dim=0)
        
            images = (images - 0.5) * 2.0
            latent = stable_diffusion.vae.encode(images).latent_dist.mean
            
            latents.append(latent.detach().cpu().numpy())
            fnames.append(file_name)
    
    latents = np.concatenate(latents, axis=0)
    
    np.savez(
        output_path,
        latents=latents,
        file_names=np.array(fnames)
    )

In [27]:
ROOT = '../dataset/scrap/steam/'

sizes = pd.read_csv(os.path.join(ROOT, 'sizes.csv'))

file_names = sizes.query('width >= 512 and height >= 512 and (width / height) > 0.5 and (width / height) < 2.0 and aesthetic > 5. and nsfw < -2.')['file_name']
file_names = list(file_names + '.jpg')

# npz = np.load(os.path.join(ROOT, 'vae_latents.npz'))
file_names = list(set(file_names).difference(npz['file_names']))

worker(os.path.join(ROOT, 'screenshot'), file_names, os.path.join(ROOT, 'vae_latents-2.npz'))

  0%|          | 0/10728 [00:00<?, ?it/s]

In [None]:
ROOT = '../dataset/AVA/'

sizes = pd.read_csv(os.path.join(ROOT, 'sizes.csv'))

file_names = sizes.query('width >= 512 and height >= 512 and (width / height) > 0.9 and (width / height) < 1.1')['file_name']
file_names = list(file_names)

worker(os.path.join(ROOT, 'images'), file_names, os.path.join(ROOT, 'vae_latents.npz'))

In [25]:
ROOT = '../dataset/laion-art/'

sizes = pd.read_csv('../dataset/laion-art/selected.csv')
sizes = sizes.query('WIDTH >= 512 and HEIGHT >= 512 and (WIDTH / HEIGHT) > 0.9 and (WIDTH / HEIGHT) < 1.1')

file_names = list()
for index in sizes['index']:
    file_name = f'{index}.jpeg'
    if os.path.exists(os.path.join(ROOT, 'images', file_name)):
        file_names.append(file_name)

worker(os.path.join(ROOT, 'images'), file_names, os.path.join(ROOT, 'vae_latents.npz'))

  0%|          | 0/9785 [00:00<?, ?it/s]