<a href="https://colab.research.google.com/github/olaviinha/NeuralTextToImage/blob/main/Stable_Diffusion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#<font face="Trebuchet MS" size="6">Stable Diffusion <font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><font color="#999" size="4">Neural text-to-image</font><font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><a href="https://github.com/olaviinha/NeuralTextToImage" target="_blank"><font color="#999" size="4">Github</font></a>

Generate images from text prompt using [Stable Diffusion](https://github.com/huggingface/diffusers). Stable Diffusion is a text-to-image latent diffusion model created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai) and [LAION](https://laion.ai).

**Requirements** to run this notebook:
- [Hugging Face](https://huggingface.co/) user account (register for free).
- Agreeing to the terms of used model card: [v1.4](https://huggingface.co/CompVis/stable-diffusion-v1-4), [v1.5](https://huggingface.co/runwayml/stable-diffusion-v1-5).
- Hugging Face access token, which can be found (or created) [here](https://huggingface.co/settings/tokens) when you have a user account.

**Tips:**
- You may queue multiple prompts in one run by separating them with `;`.
- Enter `output_dir` relative to your Google Drive root. E.g. `ai/images` if you have a directory in your Drive called _ai_, containing a subdirectory called _images_.


In [None]:
#@title #Setup
#@markdown This cell needs to be run only once. It will mount your Google Drive and setup prerequisites.<br>
#@markdown <small>Mounting Drive will enable this notebook to save outputs directly to your Drive. Otherwise you will need to copy/download them manually from this notebook.</small>

force_setup = False
pip_packages = 'diffusers==0.2.4 transformers scipy ftfy'
main_repository = ''

#@markdown Copy-paste your Hugging Face access token in the field below prior to executing this cell. You can find your access token [here](https://huggingface.co/settings/tokens).
access_token = "" #@param {type:"string"}
model_id = "runwayml/stable-diffusion-v1-5" #@param ["CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5", "hakurei/waifu-diffusion"]
mount_drive = True #@param {type:"boolean"}

import os
from google.colab import output
import warnings
warnings.filterwarnings('ignore')
%cd /content/

# inhagcutils
if not os.path.isfile('/content/inhagcutils.ipynb') and force_setup == False:
  !pip -q install import-ipynb {pip_packages}
  !curl -s -O https://raw.githubusercontent.com/olaviinha/inhagcutils/master/inhagcutils.ipynb
import import_ipynb
from inhagcutils import *

# Mount Drive
if mount_drive is True:
  if not os.path.isdir('/content/drive'):
    from google.colab import drive
    drive.mount('/content/drive')
    drive_root = '/content/drive/My Drive'
  if not os.path.isdir('/content/mydrive'):
    os.symlink('/content/drive/My Drive', '/content/mydrive')
    drive_root = '/content/mydrive/'
  drive_root_set = True
else:
  create_dirs(['/content/faux_drive'])
  drive_root = '/content/faux_drive/'

if main_repository is not '':
  !git clone {main_repository}

import time, sys
from datetime import timedelta

!pip install "ipywidgets>=7,<8"

from PIL import Image

def image_grid(imgs, rows, cols):
  assert len(imgs) == rows*cols
  w, h = imgs[0].size
  grid = Image.new('RGB', size=(cols*w, rows*h))
  grid_w, grid_h = grid.size
  
  for i, img in enumerate(imgs):
    grid.paste(img, box=(i%cols*w, i//cols*h))
  return grid

def whatGPU():
  x = !nvidia-smi
  g = ''.join(x);
  gpu = 'None'
  if 'A100' in g:
    gpu = 'A100'
    restart = True
  elif 'V100' in g:
    gpu = 'V100'
  elif 'P100' in g:
    gpu = 'P100'
  elif 'T4' in g:
    gpu = 'T4'
  else:
    gpu = '?'
  return gpu  

def aspectRatioToPixels(aspect_ratio='1:1', max_px=704*704):
  w, h = [int(x) for x in aspect_ratio.split(':')]
  nw = math.floor(math.sqrt(max_px/h*w));
  nh = math.floor(math.sqrt(max_px/w*h));
  nw = (nw+0x20)&(~0x3f);
  nh = (nh+0x20)&(~0x3f);
  return nw, nh

import torch
from diffusers import StableDiffusionPipeline

pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16, use_auth_token=access_token)

pipe = pipe.to("cuda")

# Uncomment this to disable NSFW filter
# pipe.safety_checker = lambda images, **kwargs: (images, False)

from torch import autocast
gpu = whatGPU()
output.clear()
op(c.ok, 'Setup finished.')

In [None]:

#@markdown <br>

#@markdown #S̛̞̩͎͓ ̦̤͉͚̏ ̧̠͋͘ͅl͕̞͕̝͗̐͘.̠̰̳̫̈́̚ ̡͉̼̩̬̈́̇͒͘ȩ̨͎͛̔͆͊̏͜ͅ.͕̩̹̠̕͜ ̛̦̦̮e̢͐͊͂̀̊ͅ ̜̙̝̊͋ ̬̝̱̱͗p̮̎̽̌

#@markdown <br>

generate_image_of = "" #@param {type:"string"}
aspect_ratio = "1:1 - square" #@param ["1:1 - square", "4:3 - landscape", "16:9 - wide", "3:4 - portrait", "9:16 - tall"]
seed = 0 #@param {type:"integer"}
batch_size = 3 #@param {type:"integer"}
repeats = 1 #@param {type:"integer"}
output_dir = "" #@param {type:"string"}

# Defaults
width = None 
height = None 
steps = 100
guidance_scale = 7.5

# Uncommentable advanced settings. Uncomment for fields.
# Width and height are automatically used instead of aspect_ratio, if uncommented.
#
# width = 704 #@param {type:"slider", min:256, max:1152}
# height = 704 #@param {type:"slider", min:256, max:1152}
#
# steps = 100 #@param {type:"integer"}
# guidance_scale = 7.5 #@param {type:"number"}

# Max total pixels on P100 is 704x704, but will produce slightly less coherent images.
# You may increase this for whatever your GPU can handle. Use multiples of 64.
max_resolution = 512*512

uniq_id = gen_id()

if output_dir == '':
  dir_out = dir_tmp
else:
  if not os.path.isdir(drive_root+output_dir):
    os.mkdir(drive_root+output_dir)
  dir_out = drive_root+fix_path(output_dir)
  
timer_start = time.time()

if width is not None and height is not None:
  w = width
  h = height
else:
  w, h = aspectRatioToPixels(aspect_ratio.split(' ')[0], max_resolution)

if generate_image_of is 'pre':
  prompts = predefined_prompts
else:
  if ';' in generate_image_of:
    prompts = [x.strip() for x in generate_image_of.split(';')]
  else:
    prompts = [generate_image_of]

if repeats > 1:
  prompts = prompts * repeats

if seed is '': seed = 0

if batch_size < 1: batch_size = 1
total = len(prompts)

op(c.okb, 'Run id: '+uniq_id)
for ii, prompt in enumerate(prompts):
  img_row = []
  n = ii+1
  print()
  op(c.title, 'Generating '+str(n)+'/'+str(total)+' ('+str(batch_size)+'x):', prompt, time=True)
  for i in range(batch_size):
    stamp = int(time.time())
    new_seed = seed if seed != 0 else stamp
    nn = i+1
    op(c.okb, 'Image '+str(nn)+'/'+str(batch_size)+', seed: '+str(new_seed), time=True)
    with autocast("cuda"):
      images = pipe(prompt, height=h, width=w, guidance_scale=guidance_scale, num_inference_steps=steps, generator=torch.Generator("cuda").manual_seed(new_seed))["sample"]
    img_row.extend(images)
    for iii, image in enumerate(images):
      nnn = iii+1
      file_out = uniq_id+'_'+str(n)+'-'+str(nn)+'_'+slug(prompt)[:50]+'_'+str(new_seed)+'.png'
      image.save(dir_out+file_out)
  grid = image_grid(img_row, rows=1, cols=batch_size)
  display(grid)

op(c.ok, 'Images saved in', dir_out.replace(drive_root, ''), time=True)

timer_end = time.time()

elapsed = timedelta(seconds=timer_end-timer_start)
op(c.okb, 'Elapsed '+str(elapsed))
op(c.ok, 'FIN.', time=True)
