<a href="https://colab.research.google.com/github/centuryglass/glid-3-xl-expanded-inpainting/blob/master/colab/GLID_3_XL_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GLID-3-XL Inpainting/Image Generation

Run all steps in order without changes to test inpainting.  Update the variables in the first two steps to alter inpainting/image generation to fit your needs. Not all options are fully tested, but basic inpainting and image generation should work.


---

## Define image generation variables:
If you've already run every step in this notebook, you can change these values, then immediately re-run the image generation step to get new results 
with the updated settings:

In [None]:
# Image to use for inpainting:
edit='http://images.wikia.com/dukenukem/images/c/ca/256x256.jpg'

# Image mask covering the areas that should be edited:
mask='https://i.imgur.com/nSLxIv5.png' 

# Inpainted areas should fit this description:
prompt='galaxy brain'

# Inpainted areas should not fit this description:
negative='realistic anatomy'

# batch_size*num_batches inpainting sample images will be generated:
batch_size=3
num_batches=3

# init_image doesn't work with inpainting, leave as None unless you're going
# to update the notebook to use the standard image generation model:
init_image=None

# Image dimensions:
# Values other than 256,256 are unlikely to work well, especially for
# inpainting:
width=256
height=256

# Number of diffusion steps (defaults to 27 if set to None):
steps=None

# Higher values of guidance_scale adhere more closely to the prompt, but are
# less likely to produce interesting variations:
guidance_scale=5.0


# Define model init settings:
If you change any of these values, you will need to re-run the 
"load all models" step (and possibly the "download required models" step)
to apply changes.

In [None]:


# Name of the primary model to use:
model_path = 'inpaint.pt'
# "finetune.pt" and "diffusion.pt" are also valid options. Those models are for
# regular image generation, not inpainting, so 'edit' and 'mask' will be unset
# if these models are used.
if model_path != 'inpaint.pt':
  edit=None
  mask=None

# Set to true to load models from your google drive instead of downloading:
use_google_drive=False

# Clip model to use. options are RN50, RN101, RN50x4, RN50x16, RN50x64,
#  ViT-B/32, ViT-B/16, ViT-L/14. Most of these are untested.
clip_model_name = 'ViT-L/14'

# Set clip_guidance to true for greater accuracy but reduced speed.
# This probably requires Colab Pro.
clip_guidance=False
clip_guidance_scale=150

# Changing these will subtly alter the image generation process in ways I
# haven't bothered to identify yet. You should just be able to leave them as-is
# and still get good results:
cutn=16
ddim=False
ddpm=False # Not working currently, leave this as False for now



---


# Initial loading steps:
Unless you change model settings or restart the session, you should only need to run these steps once.

In [None]:
# install dependencies:
!pip install ipywidgets omegaconf>=2.0.0 pytorch-lightning>=1.0.8 torch-fidelity einops ftfy regex tqdm transformers
!pip install git+https://github.com/openai/CLIP.git
!git clone https://github.com/CompVis/taming-transformers.git
!git clone https://github.com/CompVis/latent-diffusion.git
!pip install -e taming-transformers
!pip install -e latent-diffusion
import sys
sys.path.append('/content/taming-transformers')
sys.path.append('/content/latent-diffusion')
!git clone https://github.com/centuryglass/glid-3-xl-expanded-inpainting
%cd glid-3-xl-expanded-inpainting
!git fetch origin
!git checkout origin/colab-refactor
!pip install -e .


In [None]:
# download required models:
if use_google_drive:
    from google.colab import drive
    drive.mount('/content/gdrive')
    !cp /content/gdrive/MyDrive/bert.pt .
    !cp /content/gdrive/MyDrive/kl-f8.pt .
    !cp '/content/gdrive/MyDrive/{model_path}' .
else:
    !wget https://dall-3.com/models/glid-3-xl/bert.pt
    !wget https://dall-3.com/models/glid-3-xl/kl-f8.pt
    !wget 'https://dall-3.com/models/glid-3-xl/{model_path}'

In [None]:
# load all models: 
import torch
import gc
gc.collect()
device = torch.device('cuda:0')
from startup.load_models import loadModels
model_params, model, diffusion, ldm, bert, clip_model, clip_preprocess, normalize = loadModels(
    device,
    model_path=model_path,
    clip_model_name=clip_model_name,
    steps=steps,
    clip_guidance=clip_guidance,
    ddpm=ddpm,
    ddim=ddim)

# Image Generation:
You can re-run this step as many times as you want to continue producing new images. Each run will overwrite the images from the previous run, so make sure to download any that you want to keep first.

In [None]:
# prepare sample generation function, generate images:
from startup.create_sample_function import createSampleFunction
sample_fn = createSampleFunction(
        device,
        model,
        model_params,
        bert,
        clip_model,
        ldm,
        diffusion,
        edit=edit,
        mask=mask,
        prompt=prompt,
        negative=negative,
        guidance_scale=guidance_scale,
        batch_size=batch_size,
        width=width,
        height=height,
        cutn=cutn,
        edit_width=256,
        edit_height=256,
        edit_x=0,
        edit_y=0,
        clip_guidance=clip_guidance,
        clip_guidance_scale=clip_guidance_scale,
        skip_timesteps=0,
        ddpm=ddpm,
        ddim=ddim)  

from PIL import Image
import os
from torchvision.transforms import functional as TF
import numpy as np
from startup.utils import *
from startup.generate_samples import generateSamples
!rm -rf output output_npy
!mkdir output output_npy
def save_sample(i, sample, clip_score=False):
    for k, image in enumerate(sample['pred_xstart'][:batch_size]):
        image /= 0.18215
        im = image.unsqueeze(0)
        out = ldm.decode(im)

        npy_filename = f'output_npy/{i * batch_size + k:05}.npy'
        with open(npy_filename, 'wb') as outfile:
            np.save(outfile, image.detach().cpu().numpy())

        out = TF.to_pil_image(out.squeeze(0).add(1).div(2).clamp(0, 1))

        filename = f'output/{i * batch_size + k:05}.png'
        out.save(filename)

        if clip_score:
          import clip
          text = clip.tokenize([text]*batch_size, truncate=True).to(device)
          text_clip_blank = clip.tokenize([negative]*batch_size, truncate=True).to(device)
          # clip context
          text_emb_clip = clip_model.encode_text(text)
          text_emb_clip_blank = clip_model.encode_text(text_clip_blank)
          image_emb = clip_model.encode_image(clip_preprocess(out).unsqueeze(0).to(device))
          image_emb_norm = image_emb / image_emb.norm(dim=-1, keepdim=True)
          text_emb_norm = text_emb_clip[0] / text_emb_clip[0].norm(dim=-1, keepdim=True)
          similarity = torch.nn.functional.cosine_similarity(image_emb_norm, text_emb_norm, dim=-1)

          final_filename = f'output/{similarity.item():0.3f}_{i * batch_size + k:05}.png'
          os.rename(filename, final_filename)

          npy_final = f'output_npy/{similarity.item():0.3f}_{i * batch_size + k:05}.npy'
          os.rename(npy_filename, npy_final)
gc.collect()
generateSamples(ldm, diffusion, sample_fn, save_sample, batch_size, num_batches)

# View results:
import cv2
from google.colab.patches import cv2_imshow
for sample in os.listdir('output'):
  img = cv2.imread(f'output/{sample}', cv2.IMREAD_UNCHANGED)
  cv2_imshow(img)

In [None]:
# Optional: run this to download all results as a zip file.
!zip -r results.zip output
from google.colab import files
files.download("results.zip")