### 1. Define ImageBind model

In [15]:
# Standard Libraries
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import json
import requests

# PyTorch and Related Libraries
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.utils import save_image
from PIL import Image
from transformers import ViTFeatureExtractor, ViTForImageClassification

# Custom Modules
import image_bind.data as data
from image_bind.models import imagebind_model
from image_bind.models.imagebind_model import ModalityType
from ldm.models.diffusion.ddpm import ImageEmbeddingConditionedLatentDiffusion
from ldm.models.diffusion.ddim import DDIMSampler

# Configuration Management
from omegaconf import OmegaConf
from easydict import EasyDict

# Ensure your models and data paths are correctly set up
# and any necessary initialization for custom modules is done here.

In [16]:
class Binder:
    """ Wrapper for ImageBind model
    """
    def __init__(self, pth_path, device='cuda'):
        self.model = imagebind_model.imagebind_huge(pretrained=True)
        self.device = device
        self.model.eval()
        self.model.to(device)

        self.data_process_dict = {ModalityType.TEXT: data.load_and_transform_text,
                                  ModalityType.VISION: data.load_and_transform_vision_data,
                                  ModalityType.AUDIO: data.load_and_transform_audio_data}

    def run(self, ctype, cpaths, post_process=False):
        """ ctype: str
            cpaths: list[str]
        """
        inputs = {ctype: self.data_process_dict[ctype](cpaths, self.device)}
        with torch.no_grad():
            embeddings = self.model(inputs)
        return embeddings[ctype]
        
    def run_tensor(self, ctype, image_tensor):
        """ ctype: str
        """
        inputs = {ctype: image_tensor}
        with torch.no_grad():
            embeddings = self.model(inputs)
        return embeddings[ctype]
        
device = 'cpu'
binder = Binder(pth_path="/home/eugene/models/converted/imagebind/imagebind_huge.pth", device=device)

### 2. Define Diffusion model

In [17]:
# options
opt = EasyDict(config = './configs/stable-diffusion/v2-1-stable-unclip-h-bind-inference.yaml',
               device = 'cuda:0',
               ckpt = './checkpoints/stable-diffusion-2-1-unclip/sd21-unclip-h.ckpt',
               C = 4,
               H = 768,
               W = 768,
               f = 8,
               steps = 50, 
               n_samples = 1,
               scale = 20,
               ddim_eta = 0,
                torch_dtype=torch.float16
               )

config = OmegaConf.load(f"{opt.config}")
shape = [opt.C, opt.H // opt.f, opt.W // opt.f]
batch_size = opt.n_samples

# prepare diffusion model
model = ImageEmbeddingConditionedLatentDiffusion(**config.model['params'])
pl_sd = torch.load(opt.ckpt, map_location="cpu")
sd = pl_sd["state_dict"]
model.load_state_dict(sd, strict=False)
model= model.half()
model.to(opt.device)
model.eval()

sampler = DDIMSampler(model, device=opt.device)

ImageEmbeddingConditionedLatentDiffusion: Running in v-prediction mode
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossA

In [18]:
def load_img(path):
    image = Image.open(path).convert("RGB")
    w, h = image.size
    print(f"loaded input image of size ({w}, {h}) from {path}")
    w, h = map(lambda x: x - x % 64, (w, h))  # resize to integer multiple of 64
    image = image.resize((w, h), resample=Image.LANCZOS)
    image = np.array(image).astype(np.float32) / 255.0
    image = image[None].transpose(0, 3, 1, 2)
    image = torch.from_numpy(image)
    return 2. * image - 1.

### 3. Image-conditioned image generation

In [19]:
IMG_MEAN=(0.48145466, 0.4578275, 0.40821073)
IMG_STD=(0.26862954, 0.26130258, 0.27577711)

def unnorm(tensor, mean=IMG_MEAN, std=IMG_STD):
    m = torch.tensor(IMG_MEAN)[None, :, None, None].to(device)
    s = torch.tensor(IMG_STD)[None, :, None, None].to(device)
    return (tensor.clone().to(device) * s) + m

def norm(tensor, mean=IMG_MEAN, std=IMG_STD):
    m = torch.tensor(IMG_MEAN)[None, :, None, None].to(device)
    s = torch.tensor(IMG_STD)[None, :, None, None].to(device)
    return (tensor.clone().to(device) - m) / s
    
transform = transforms.ToPILImage()

In [20]:
def generate_image_iamge(embeddings):
    prompts = ['colorful, DSLR quality, clear, vivid'] * batch_size    # you may add extra descriptions you like here
    # c_adm = binder.run(ctype='audio', cpaths=['assets/bird_audio.wav'])
    c_adm = embeddings / embeddings.norm() * 20   # a norm of 20 typically gives better result 
    c_adm = torch.cat([c_adm] * batch_size, dim=0)
    c_adm = c_adm.half().to('cuda:0')
    n_prompt = 'watermark, longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

    with torch.no_grad(), torch.autocast('cuda'):
        c_adm, noise_level_emb = model.noise_augmentor(c_adm, noise_level=torch.zeros(batch_size).long().to(c_adm.device))
        c_adm = torch.cat((c_adm, noise_level_emb), 1)
    
        uc = model.get_learned_conditioning(batch_size * [n_prompt])    # negative prompts
        uc = {"c_crossattn": [uc], "c_adm": torch.zeros_like(c_adm)}
        c = {"c_crossattn": [model.get_learned_conditioning(prompts)], "c_adm": c_adm}
        
        samples, _ = sampler.sample(S=opt.steps,
                                    conditioning=c,
                                    batch_size=batch_size,
                                    shape=shape,
                                    verbose=False,
                                    unconditional_guidance_scale=opt.scale,
                                    unconditional_conditioning=uc,
                                    eta=opt.ddim_eta,
                                    x_T=None)
    
    x_samples = model.decode_first_stage(samples.half())
    x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
    # plt.imshow(x_samples[0].permute(1,2,0).cpu().float().numpy())
    return x_samples

In [21]:
image_model = imagebind_model.imagebind_huge(pretrained=True)
image_model.to('cpu')
image_model.eval()
0

0

In [30]:
input_dir="/home/eugene/BindDiffusion/selected_label_experiment/perturbed_image_jpg"
x_inits = np.load('/home/rishi/code/adversarial_collisions/outputs/imagenet/16_tingwei/x_advs.npy')
x_inits = torch.tensor(x_inits)
for i, img in enumerate(x_inits):
    save_image(torch.squeeze(unnorm(img)), os.path.join(input_dir, f'image_{i}.jpg'))

In [32]:
# Directory to save generated images
output_dir = 'selected_label_experiment/generated_image_perturbed_eps16_jpg'
os.makedirs(output_dir, exist_ok=True)  # Create directory if not exists
generate_from="image"
if generate_from=="embedding":
    X_advs = np.load('/home/rishi/code/adversarial_collisions/outputs/imagenet/16_tingwei/x_advs.npy')
    X_advs = torch.tensor(X_advs)
    with torch.no_grad():
        for i, img in enumerate(X_advs):
            print(i)
            embeddings = image_model.forward({'vision': img.unsqueeze(0)}, normalize=False)
            x_samples = generate_image_iamge(embeddings['vision'])
            save_image(x_samples[0], os.path.join(output_dir, f'image_{i}.png'))
elif generate_from=="image":
   # Iterate through the saved images
    for i in range(len(x_inits)):
        print(i)
        image_path = os.path.join("/home/eugene/BindDiffusion/selected_label_experiment/perturbed_image_jpg", f'image_{i}.jpg')
        # Open the image using PIL
        inputs = {'vision': data.load_and_transform_vision_data([image_path], 'cpu')}
        embeddings = binder.model(inputs)
        x_samples = generate_image_iamge(embeddings['vision'])
        save_image(x_samples[0], os.path.join(output_dir, f'image_{i}.png'))


0
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.24it/s]


1
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.21it/s]


2
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.16it/s]


3
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.11it/s]


4
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.09it/s]


5
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.07it/s]


6
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.05it/s]


7
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


8
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


9
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


10
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


11
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


12
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


13
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


14
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


15
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


16
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


17
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


18
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


19
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


20
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


21
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


22
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


23
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


24
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


25
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


26
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


27
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


28
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


29
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


30
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


31
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


32
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


33
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


34
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


35
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


36
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


37
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


38
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


39
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.05it/s]


40
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


41
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


42
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


43
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


44
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


45
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


46
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


47
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


48
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


49
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


50
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


51
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


52
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


53
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


54
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


55
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


56
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


57
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


58
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


59
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


60
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


61
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


62
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


63
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


64
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


65
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


66
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


67
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


68
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


69
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


70
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


71
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


72
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


73
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


74
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


75
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


76
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


77
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


78
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


79
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


80
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


81
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


82
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


83
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


84
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


85
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


86
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


87
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


88
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


89
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


90
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


91
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


92
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


93
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


94
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


95
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


96
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


97
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


98
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.03it/s]


99
Data shape for DDIM sampling is (1, 4, 96, 96), eta 0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  6.04it/s]


### 4. Evaluation of the generated images

In [34]:
# Load the numpy arrays from file
orig_label_path = 'outputs/imagenet/16_tingwei/y_origs.npy'
y_ids_path = 'outputs/imagenet/16_tingwei/y_ids.npy'
orig_labels = np.load(orig_label_path)
y_ids = np.load(y_ids_path)
# Convert numpy arrays to lists of integers
orig_labels_list = orig_labels.tolist()
y_ids_list = y_ids.tolist()

In [46]:
# ImageNet class labels
IMAGENET_LABELS_URL = "https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json"
class_idx = json.loads(requests.get(IMAGENET_LABELS_URL).text)
idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))]

# Image processing pipeline for EfficientNet
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load EfficientNet-B7
efficientnet = models.efficientnet_b7(pretrained=True).eval()

image_processor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
vit_model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224")

def classify_efficientnet(image_path):
    """Classify image using EfficientNet-B7 and return top-5 labels and indices."""
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    
    with torch.no_grad():
        outputs = efficientnet(image)
        # Get the top-5 predictions
        _, predicted_indices = torch.topk(outputs, 5)
        predicted_labels = [idx2label[idx] for idx in predicted_indices[0]]

    return predicted_labels, predicted_indices[0].tolist()

def classify_vit(image_path):
    """Classify image using Vision Transformer and return top-5 labels and indices."""
    image = Image.open(image_path)
    inputs = image_processor(image, return_tensors="pt")
    with torch.no_grad():
        logits = vit_model(**inputs).logits
        # Get the top-5 predictions
        _, predicted_indices = torch.topk(logits, 5)
        predicted_labels = [idx2label[idx] for idx in predicted_indices[0]]

    return predicted_labels, predicted_indices[0].tolist()

labels_unperturbed = {}
results_unperturbed = []
labels_perturbed = {}
results_perturbed = []
labels_original = {}
results_original = []

# Directory containing your images
image_directory = "selected_label_experiment/generated_image_unperturbed_eps16"  # replace with your directory path
for i in range(100):  # limit to first 100 images
    image_filename = f"image_{i}.png"
    image_path = os.path.join(image_directory, image_filename)
    
    # Check if the file exists
    if os.path.isfile(image_path):
        label1, result1 = classify_vit(image_path)
        results_unperturbed.append(result1)
        labels_unperturbed[image_filename] = label1
    else:
        print(f"The file {image_path} does not exist.")

# Directory containing your images
image_directory = "elected_label_experiment/generated_image_perturbed_eps16_jpg"  # replace with your directory path
# Loop through the images by index
for i in range(100):  # limit to first 100 images
    image_filename = f"image_{i}.png"
    image_path = os.path.join(image_directory, image_filename)
    
    # Check if the file exists
    if os.path.isfile(image_path):
        label1, result1 = classify_vit(image_path)
        results_perturbed.append(result1)
        labels_original[image_filename] = label1
    else:
        print(f"The file {image_path} does not exist.")

# Directory containing your images
image_directory = "/home/eugene/BindDiffusion/original_image"  # replace with your directory path
# Loop through the images by index
for i in range(100):  # limit to first 100 images
    image_filename = f"image_{i}.png"
    image_path = os.path.join(image_directory, image_filename)
    
    # Check if the file exists
    if os.path.isfile(image_path):
        label1, result1 = classify_vit(image_path)
        results_original.append(result1)
        labels_original[image_filename] = label1
    else:
        print(f"The file {image_path} does not exist.")

In [144]:
correct_prediction1 = 0
correct_prediction2 = 0
correct_prediction3 = 0
correct_prediction4 = 0
correct_prediction5 = 0
total_images = len(y_ids_list)

for i in range(total_images):
    # Check if the true label is in the top-5 predictions for the i-th image.
    if orig_labels_list[i] == results_original[i][0]:
        correct_prediction1 += 1       
    if orig_labels_list[i] == results_unperturbed[i][0]:
        correct_prediction2 += 1      
    if y_ids_list[i] == results_unperturbed[i][0]:
        correct_prediction3 += 1       
    if y_ids_list[i] == results_perturbed[i][0]:
        correct_prediction4 += 1       
    if orig_labels_list[i] == results_perturbed[i][0]:
        correct_prediction5 += 1         
        
# Compute the top-5 accuracy.
top_1_accuracy1 = (correct_prediction1 / total_images) * 100
top_1_accuracy2 = (correct_prediction2 / total_images) * 100
top_1_accuracy3 = (correct_prediction3 / total_images) * 100
top_1_accuracy4 = (correct_prediction4 / total_images) * 100
top_1_accuracy5 = (correct_prediction5 / total_images) * 100
print(f"The top-1 accuracy for unperturbed image and original label is: {top_1_accuracy1:.2f}%")
print(f"The top-1 accuracy for generated image on original image and original label is: {top_1_accuracy2:.2f}%")
print(f"The top-1 accuracy for generated image on original image and target label is: {top_1_accuracy3:.2f}%")
print(f"The top-1 accuracy for generated image on perturbed image and target label is: {top_1_accuracy4:.2f}%")
print(f"The top-1 accuracy for generated image on perturbed image and original labelis: {top_1_accuracy5:.2f}%")

correct_prediction1 = 0
correct_prediction2 = 0
correct_prediction3 = 0
correct_prediction4 = 0
correct_prediction5 = 0

for i in range(total_images):
    # Check if the true label is in the top-5 predictions for the i-th image.
    if orig_labels_list[i] == results_original[i][0]:
        correct_prediction1 += 1       
    if orig_labels_list[i] == results_unperturbed[i][0]:
        correct_prediction2 += 1      
    if y_ids_list[i] == results_unperturbed[i][0]:
        correct_prediction3 += 1       
    if y_ids_list[i] == results_perturbed[i][0]:
        correct_prediction4 += 1       
    if orig_labels_list[i] == results_perturbed[i][0]:
        correct_prediction5 += 1         
        
# Compute the top-5 accuracy.
top_5_accuracy1 = (correct_prediction1 / total_images) * 100
top_5_accuracy2 = (correct_prediction2 / total_images) * 100
top_5_accuracy3 = (correct_prediction3 / total_images) * 100
top_5_accuracy4 = (correct_prediction4 / total_images) * 100
top_5_accuracy5 = (correct_prediction5 / total_images) * 100
print(f"The top-5 accuracy for unperturbed image and original label is: {top_5_accuracy1:.2f}%")
print(f"The top-5 accuracy for generated image on original image and original label is: {top_5_accuracy2:.2f}%")
print(f"The top-5 accuracy for generated image on original image and target label is: {top_5_accuracy3:.2f}%")
print(f"The top-5 accuracy for generated image on perturbed image and target label is: {top_5_accuracy4:.2f}%")
print(f"The top-5 accuracy for generated image on perturbed image and original labelis: {top_5_accuracy5:.2f}%")

The top-1 accuracy for unperturbed image and original label is: 85.00%
The top-1 accuracy for generated image on original image and original label is: 32.00%
The top-1 accuracy for generated image on original image and target label is: 0.00%
The top-1 accuracy for generated image on perturbed image and target label is: 38.00%
The top-1 accuracy for generated image on perturbed image and original labelis: 0.00%
The top-5 accuracy for unperturbed image and original label is: 99.00%
The top-5 accuracy for generated image on original image and original label is: 60.00%
The top-5 accuracy for generated image on original image and target label is: 0.00%
The top-5 accuracy for generated image on perturbed image and target label is: 58.00%
The top-5 accuracy for generated image on perturbed image and original labelis: 1.00%
