# Diffusion Illusions: Twisting Squares

Hi! Welcome to the official colab demo for our demo "Diffusion Illusions: Hiding Images in Plain Sight"

By Ryan Burgert, Xiang Li, Abraham Leite, Kanchana Ranasinghe and Michael Ryoo from Stony Brook University

Visit our project website: [diffusionillusions.com](https://ryanndagreat.github.io/Diffusion-Illusions/)

This project was inspired by our paper "Peekaboo: Text to Image Diffusion Models are Zero-Shot Segmentors". The Peekaboo project website: [https://ryanndagreat.github.io/peekaboo/](https://ryanndagreat.github.io/peekaboo/)

Instructions:

0. Go to the Runtime menu, and make sure this notebook is using GPU!
1. Run the top 2 code cells (one cleans colab's junk and downloads the source code, while the other installs python packages)
2. Click 'Runtime', then 'Restart Runtime'. You need to do this the first time you open this notebook to avoid weird random errors from the pip installations.
3. Run code cells to load stable diffusion. The first time you run it it will take a few minutes to download; subsequent times won't take long at all though.
4. Run all the cells below that, and customize prompt_a and prompt_b!
5. Take the result top_image and bottom_image, print them out, and shine a backlight through them like shown in the Diffusion Illusion website (link above!)

I may also create a YouTube tutorial if there's interest. Let me know if this would be helpful!

This notebook was written by Ryan Burgert. Feel free to reach out to me at rburgert@cs.stonybrook.edu if you have any questions! 

In [1]:
%%bash
if [ ! -d ".git" ]; then 
    rm -rf * .*; #Get rid of Colab's default junk files
    git clone -b master https://github.com/RyannDaGreat/Diffusion-Illusions .
fi

In [None]:
%pip install --upgrade -r requirements.txt
%pip install rp --upgrade
# You may need to restart the runtime after installing these
# I'm not sure why this helps, but all sorts of weird random errors pop up in Colab if you don't

In [None]:
#SET YOUR PROMPTS HERE

prompt_a = "Hatsune Miku"
prompt_b = "A dog's face photograph 4k"

prompt_a=''
prompt_b=''

NUMBER_OF_SQUARES = 4 # A 4x4 twisting squares illusion

#Optional: Specify what you DON'T want to see
negative_prompt = 'blurry ugly'

print()
print('Negative prompt:',repr(negative_prompt))
print()
print('Chosen prompts:')
print('    prompt_a =', repr(prompt_a)) #This will be right-side up
print('    prompt_b =', repr(prompt_b)) #This will be upside-down

In [4]:
from rp import *
import numpy as np
import rp
import torch
import torch.nn as nn
import source.stable_diffusion as sd
from easydict import EasyDict
from source.learnable_textures import LearnableImageFourier
from source.stable_diffusion_labels import NegativeLabel
from itertools import chain
import time

In [None]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

if 's' not in dir():
    #You can select the original Stable Diffusion 1.5 or some dreambooth of it
    model_name="CompVis/stable-diffusion-v1-4"
    model_name="runwayml/stable-diffusion-v1-5"
    # model_name="nitrosocke/Arcane-Diffusion"
    gpu=rp.select_torch_device()
    s=sd.StableDiffusion(gpu,model_name)
device=s.device

In [6]:
label_a = NegativeLabel(prompt_a,negative_prompt)
label_b = NegativeLabel(prompt_b,negative_prompt)

In [None]:
from PIL import Image
from torchvision import transforms

transform = transforms.Compose(
[transforms.Resize((512,512)),
transforms.ToTensor()])

def load_image(path):
    return Image.open(path).convert('RGB')

def transform_image(img):
    if img is None:
        return None
    return transform(img).reshape(1,3,512,512).to(device)

img_a='./img/miko.png'
img_b='./img/shogun.png'
img_prompt_a = load_image(img_a) if img_a else None
img_prompt_b = load_image(img_b) if img_b else None

img_prompts=[transform_image(img_prompt_a),transform_image(img_prompt_b)]

# display the image
rp.display_image(img_prompt_a)
rp.display_image(img_prompt_b)

In [8]:
#Image Parametrization and Initialization (this section takes vram)

#Select Learnable Image Size (this has big VRAM implications!):
#Note: We use implicit neural representations for better image quality
#They're previously used in our paper "TRITON: Neural Neural Textures make Sim2Real Consistent" (see tritonpaper.github.io)
# ... and that representation is based on Fourier Feature Networks (see bmild.github.io/fourfeat)
learnable_image_maker = lambda: LearnableImageFourier(height=256,width=256,num_features=256,hidden_dim=256,scale=10).to(s.device);SIZE=256

image=learnable_image_maker()

In [9]:
import torch
import torch.nn.functional as F

def rotate_tiles(image, num_divisions=NUMBER_OF_SQUARES):
    image=as_torch_image(image)
    # Assuming image is a tensor of shape (num_channels, height, width)
    num_channels, height, width = image.shape

    tile_size=width//num_divisions
    
    # Calculate the number of tiles in each dimension
    tiles_x = width // tile_size
    tiles_y = height // tile_size

    # Initialize an output tensor
    output = torch.zeros_like(image)

    for x in range(tiles_x):
        for y in range(tiles_y):
            # Extract the tile
            tile = image[:, y*tile_size:(y+1)*tile_size, x*tile_size:(x+1)*tile_size]

            # Check if the tile should be rotated 90 or -90 degrees (checker pattern)
            if (x + y) % 2 == 0:
                # Rotate 90 degrees
                tile = tile.rot90(1, [1, 2])
            else:
                # Rotate -90 degrees
                tile = tile.rot90(-1, [1, 2])

            # Place the rotated tile back in the output tensor
            output[:, y*tile_size:(y+1)*tile_size, x*tile_size:(x+1)*tile_size] = tile

    return output

learnable_image_a=lambda: image()
learnable_image_b=lambda: rotate_tiles(image()) #Upside-down

optim=torch.optim.SGD(image.parameters(),lr=1e-4)

In [10]:
labels=[label_a,label_b]
learnable_images=[learnable_image_a,learnable_image_b]

#The weight coefficients for each prompt. For example, if we have [0,1], then only the upside-down mode will be optimized
weights=[1,1]

weights=rp.as_numpy_array(weights)
weights=weights/weights.sum()
weights=weights*len(weights)

In [11]:
#For saving a timelapse
ims=[]

In [12]:
def get_display_image():
    return rp.tiled_images(
        [
            rp.as_numpy_image(learnable_image_a()),
            rp.as_numpy_image(learnable_image_b()),
        ],
        length=len(learnable_images),
        border_thickness=0,
    )

In [None]:
NUM_ITER=5000

#Set the minimum and maximum noise timesteps for the dream loss (aka score distillation loss)
s.max_step=MAX_STEP=990
s.min_step=MIN_STEP=10 

television = rp.JupyterDisplayChannel()
television.display()

display_eta=rp.eta(NUM_ITER, title='Status')

DISPLAY_INTERVAL = 200

print('Every %i iterations we display an image in the form [image_a, image_b], where'%DISPLAY_INTERVAL)
print('    image_a = (the right-side up image)')
print('    image_b = (image_a, but upside down)')
print()
print('Interrupt the kernel at any time to return the currently displayed image')
print('You can run this cell again to resume training later on')
print()
print('Please expect this to take quite a while to get good images (especially on the slower Colab GPU\'s)! The longer you wait the better they\'ll be')

try:
    for iter_num in range(NUM_ITER):
        display_eta(iter_num) #Print the remaining time

        preds=[]
        for label,learnable_image,weight,img_prompt in rp.random_batch(list(zip(labels,learnable_images,weights,img_prompts)), batch_size=1):
            pred=s.train_step(
                label.embedding,
                learnable_image()[None],

                #PRESETS (uncomment one):
                noise_coef=.1*weight,guidance_scale=100,#10
                # noise_coef=0,image_coef=-.01,guidance_scale=50,
                # noise_coef=0,image_coef=-.005,guidance_scale=50,
                # noise_coef=.1,image_coef=-.010,guidance_scale=50,
                # noise_coef=.1,image_coef=-.005,guidance_scale=50,
                # noise_coef=.1*weight, image_coef=-.005*weight, guidance_scale=50,
                image_prompt=img_prompt,
                img_prompts_coef=2.0*weight,
            )
            preds+=list(pred)

        with torch.no_grad():
            if iter_num and not iter_num%(DISPLAY_INTERVAL*50):
                #Wipe the slate every 50 displays so they don't get cut off
                from IPython.display import clear_output
                clear_output()

            if not iter_num%(DISPLAY_INTERVAL//4):
                im = get_display_image()
                ims.append(im)
                television.update(im)
                
                if not iter_num%DISPLAY_INTERVAL:
                    rp.display_image(im)

        optim.step()
        optim.zero_grad()
except KeyboardInterrupt:
    print()
    print('Interrupted early at iteration %i'%iter_num)
    im = get_display_image()
    ims.append(im)
    rp.display_image(im)

In [None]:
print('Unsolved Image:')
rp.display_image(rp.as_numpy_image(learnable_image_a()))

print('Solved Image:')
rp.display_image(rp.as_numpy_image(learnable_image_b()))

In [None]:
folder = ''
def save_run(name):
    global folder
    folder="untracked/parker_puzzle_runs/%s"%name
    if rp.path_exists(folder):
        folder+='_%i'%time.time()
    rp.make_directory(folder)
    ims_names=['ims_%04i.png'%i for i in range(len(ims))]
    with rp.SetCurrentDirectoryTemporarily(folder):
        rp.save_images(ims,ims_names,show_progress=True)
    print()
    print('Saved timelapse to folder:',repr(folder))
    
save_run('-'.join([prompt_a,prompt_b])) #You can give it a good custom name if you want!

In [None]:
# generate the twisting squares gif from the image
def generate_twisting_squares_gif(input_path:str, output_path:str, num_frames:int = 31, duration:float = 0.1):
    from PIL import Image
    import imageio
    
    input_image = Image.open(input_path).convert('RGB')
    width, height = input_image.size
    input_image = input_image.crop((0, 0, width // 2, height))
    width, height = input_image.size

    # Divide the image into NUMBER_OF_SQUARES * NUMBER_OF_SQUARES parts
    each_part = []
    part_width = width // NUMBER_OF_SQUARES
    part_height = height // NUMBER_OF_SQUARES
    for i in range(NUMBER_OF_SQUARES):
        for j in range(NUMBER_OF_SQUARES):
            each_part.append(input_image.crop((j * part_width, i * part_height, (j + 1) * part_width, (i + 1) * part_height)))
    
    # make gif
    frames = []
    flag = 1  # determine the direction of rotation 
    for i in range(num_frames):
        nowframe = Image.new('RGB', (width, height))
        angle = i * (93 / num_frames)
        for j in range(NUMBER_OF_SQUARES * NUMBER_OF_SQUARES):
            rotated_part = each_part[j].rotate(flag * angle)
            nowframe.paste(rotated_part, (j % NUMBER_OF_SQUARES * part_width, j // NUMBER_OF_SQUARES * part_height))
            flag *= -1
            if (NUMBER_OF_SQUARES % 2 == 0 and (j+1) % NUMBER_OF_SQUARES == 0):
                flag *= -1
        frames.append(nowframe)
        if i in [0, num_frames - 1]:
            frames.append(nowframe)
            frames.append(nowframe)
            frames.append(nowframe)
    
    imageio.mimsave(output_path, frames, duration=duration)
    print(f'Twisting Squares images from {input_path} saved to {output_path} as gif')

# If the path is wrong, please change the path by yourself
read_path = 'ims_%04i.png'%(len(ims)-1)
save_path = 'twisting_squares%04i.gif'%(len(ims)-1)
with rp.SetCurrentDirectoryTemporarily(folder):
    generate_twisting_squares_gif(read_path, save_path)