# **Tofu Diffusion SD**

**Tofu Diffusion is a fork of the Deforum notebook, made for single image generation**\
\
[Stable Diffusion](https://github.com/CompVis/stable-diffusion) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer and the [Stability.ai](https://stability.ai/) Team. [K Diffusion](https://github.com/crowsonkb/k-diffusion) by [Katherine Crowson](https://twitter.com/RiversHaveWings). Notebook by [deforum](https://discord.gg/upmXXsrwZc)

In [None]:
#@markdown **NVIDIA GPU**
import subprocess, os, sys
sub_p_res = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.free', '--format=csv,noheader'], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(f"{sub_p_res[:-1]}")

Tesla T4, 15360 MiB, 15101 MiB


# Setup

In [None]:
#@markdown **Environment Setup**
import subprocess, time, gc, os, sys

!mkdir -p /content/civitai_models

def setup_environment():
    start_time = time.time()
    print_subprocess = False
    use_xformers_for_colab = True
    try:
        ipy = get_ipython()
    except:
        ipy = 'could not get_ipython'
    if 'google.colab' in str(ipy):
        print("..setting up environment")
        
        all_process = [
            ['pip', 'install', 'omegaconf', 'einops==0.4.1', 'pytorch-lightning==1.7.7', 'torchmetrics', 'transformers', 'safetensors', 'kornia'],
            ['git', 'clone', '-b', 'dev', 'https://github.com/deforum-art/deforum-stable-diffusion'],
            ['pip', 'install', 'accelerate', 'ftfy', 'jsonmerge', 'matplotlib', 'resize-right', 'timm', 'torchdiffeq','scikit-learn','torchsde','open-clip-torch','numpngw'],
        ]
        for process in all_process:
            running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')
            if print_subprocess:
                print(running)
        with open('deforum-stable-diffusion/src/k_diffusion/__init__.py', 'w') as f:
            f.write('')
        sys.path.extend([
            'deforum-stable-diffusion/',
            'deforum-stable-diffusion/src',
        ])
        if use_xformers_for_colab:

            print("..installing triton and xformers")

            all_process = [['pip', 'install', 'triton', 'xformers']]
            for process in all_process:
                running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')
                if print_subprocess:
                    print(running)
    else:
        sys.path.extend([
            'src'
        ])
    end_time = time.time()
    print(f"..environment set up in {end_time-start_time:.0f} seconds")
    return

setup_environment()

import torch
import random
import clip
from IPython import display
from types import SimpleNamespace
from helpers.save_images import get_output_folder
from helpers.settings import load_args
from helpers.render import render_animation, render_input_video, render_image_batch, render_interpolation
from helpers.model_load import make_linear_decode, load_model, get_model_output_paths
from helpers.aesthetics import load_aesthetics_model

..setting up environment
..installing triton and xformers
..environment set up in 41 seconds


In [None]:
import os
from IPython.display import display, HTML, clear_output

#@markdown **CivitAI Model Setup**

civitai_models_list = []
civitai_model_path = "" #@param {type:"string"}
civitai_model_name = "" #@param {type:"string"}
civitai_model_type = ".safetensors" #@param [".safetensors",".ckpt"]

#@markdown
#@markdown *Check your model type on CivitAI*\
#@markdown --- Pickletensor -> .ckpt \
#@markdown --- Safetensor -> .safetensors

if not civitai_model_path:
    display(HTML('<font color="red">Link missing</font>'))
elif not civitai_model_name:
    display(HTML('<font color="red">Name missing</font>'))
else:
    output = !wget -O /content/civitai_models/{civitai_model_name}{civitai_model_type} {civitai_model_path} && echo "Download successful" || echo "Download failed"

output_list = list(output)
    
directory = "/content/civitai_models"
files_in_directory = os.listdir(directory)
civitai_models_list = [file for file in files_in_directory if file.endswith(".ckpt") or file.endswith(".safetensors")]

clear_output()

if output_list[-1] == "Download successful":
    display(HTML('<b><font color="green">CivitAI model downloaded successfully!</font></b>'))
    print(f"{civitai_model_name}{civitai_model_type}")
else:
    print("Download failed")



In [None]:
#@markdown **Path Setup**

def Root():
    models_path = "models" 
    configs_path = "configs" 
    output_path = "outputs" 
    mount_google_drive = True #@param {type:"boolean"}
    models_path_gdrive = "/content/drive/MyDrive/AI/models" #@param {type:"string"}
    output_path_gdrive = "/content/drive/MyDrive/AI/StableDiffusion" #@param {type:"string"}

    #@markdown **Model Setup**
    map_location = "cuda" 
    model_config = "v1-inference.yaml" 
    model_checkpoint =  "custom" #@param ["custom","v2-1_768-ema-pruned.ckpt","v2-1_512-ema-pruned.ckpt","768-v-ema.ckpt","512-base-ema.ckpt","Protogen_V2.2.ckpt","v1-5-pruned.ckpt","v1-5-pruned-emaonly.ckpt","sd-v1-4-full-ema.ckpt","sd-v1-4.ckpt","sd-v1-3-full-ema.ckpt","sd-v1-3.ckpt","sd-v1-2-full-ema.ckpt","sd-v1-2.ckpt","sd-v1-1-full-ema.ckpt","sd-v1-1.ckpt", "robo-diffusion-v1.ckpt","wd-v1-3-float16.ckpt"]
    custom_config_path = "" 
    custom_checkpoint_path = "" #@param {type:"string"}
    return locals()

root = Root()
root = SimpleNamespace(**root)

root.models_path, root.output_path = get_model_output_paths(root)
root.model, root.device = load_model(root, load_on_run_all=True, check_sha256=True, map_location=root.map_location)

# Settings

In [None]:
def DeforumAnimArgs():

    #@markdown ####**Load additional settings**
    #@markdown *Don't forget to run this cell!*
    animation_mode = 'None' 
    max_frames = 1000 
    border = 'replicate' 

    ##@markdown ####**Motion Parameters:**
    angle = "0:(0)"
    zoom = "0:(1.04)"
    translation_x = "0:(10*sin(2*3.14*t/10))"
    translation_y = "0:(0)"
    translation_z = "0:(10)"
    rotation_3d_x = "0:(0)"
    rotation_3d_y = "0:(0)"
    rotation_3d_z = "0:(0)"
    flip_2d_perspective = False 
    perspective_flip_theta = "0:(0)"
    perspective_flip_phi = "0:(t%15)"
    perspective_flip_gamma = "0:(0)"
    perspective_flip_fv = "0:(53)"
    noise_schedule = "0: (0.02)"
    strength_schedule = "0: (0.65)"
    contrast_schedule = "0: (1.0)"
    hybrid_comp_alpha_schedule = "0:(1)" 
    hybrid_comp_mask_blend_alpha_schedule = "0:(0.5)"
    hybrid_comp_mask_contrast_schedule = "0:(1)"
    hybrid_comp_mask_auto_contrast_cutoff_high_schedule =  "0:(100)"
    hybrid_comp_mask_auto_contrast_cutoff_low_schedule =  "0:(0)"

    ##@markdown ####**Sampler Scheduling:**
    enable_schedule_samplers = False 
    sampler_schedule = "0:('euler'),10:('dpm2'),20:('dpm2_ancestral'),30:('heun'),40:('euler'),50:('euler_ancestral'),60:('dpm_fast'),70:('dpm_adaptive'),80:('dpmpp_2s_a'),90:('dpmpp_2m')"

    ##@markdown ####**Unsharp mask (anti-blur) Parameters:**
    kernel_schedule = "0: (5)"
    sigma_schedule = "0: (1.0)"
    amount_schedule = "0: (0.2)"
    threshold_schedule = "0: (0.0)"

    ##@markdown ####**Coherence:**
    color_coherence = 'Match Frame 0 LAB'
    color_coherence_video_every_N_frames = 1 
    color_force_grayscale = False 
    diffusion_cadence = '1' 

    ##@markdown ####**3D Depth Warping:**
    use_depth_warping = True 
    midas_weight = 0.3
    near_plane = 200
    far_plane = 10000
    fov = 40
    padding_mode = 'border'
    sampling_mode = 'bicubic'
    save_depth_maps = False 

    ##@markdown ####**Video Input:**
    video_init_path ='/content/video_in.mp4'
    extract_nth_frame = 1
    overwrite_extracted_frames = True 
    use_mask_video = False 
    video_mask_path ='/content/video_in.mp4'

    ##@markdown ####**Hybrid Video for 2D/3D Animation Mode:**
    hybrid_generate_inputframes = False 
    hybrid_use_first_frame_as_init_image = True 
    hybrid_motion = "None" 
    hybrid_motion_use_prev_img = False 
    hybrid_flow_method = "DIS Medium" 
    hybrid_composite = False 
    hybrid_comp_mask_type = "None" 
    hybrid_comp_mask_inverse = False 
    hybrid_comp_mask_equalize = "None"
    hybrid_comp_mask_auto_contrast = False 
    hybrid_comp_save_extra_frames = False 
    hybrid_use_video_as_mse_image = False

    ##@markdown ####**Interpolation:**
    interpolate_key_frames = False 
    interpolate_x_frames = 4 
    
    ##@markdown ####**Resume Animation:**
    resume_from_timestring = False
    resume_timestring = "20220829210106" 

    return locals()

In [None]:
# positive prompts
cond_prompts = {
    1: "(best resolution, 4k, best detail), perfect face, latina, cute round face, thick lips, 30 years, punk makeupgreen eyes, black bikini, perfect body, Cinematic lighting",
    2: "photograph of a model, delicate features, beautiful face, dreadlocked hair, long bangs, long ponytail, brown eyes",
}

# negative prompts
uncond_prompts = {
    1: "(lowres, bad quality), deformed body, deformed face, ugly, extra fingers, extra limbs, bad hands",
    2: "(lowres, bad quality), deformed body, deformed face, ugly, extra fingers, extra limbs, bad hands",
}

In [None]:
##@markdown **Load Settings**
override_settings_with_file = False 
settings_file = "custom" 
custom_settings_file = "/content/drive/MyDrive/Settings.txt"

def DeforumArgs():
    #@markdown **Image Settings**
    W = 512 #@param [512, 640, 768, 896, 1024]
    H = 768 #@param [512, 640, 768, 896, 1024]
    W, H = map(lambda x: x - x % 64, (W, H))  # resize to integer multiple of 64
    bit_depth_output = 8 #@param [8, 16, 32] {type:"raw"}

    #@markdown **Sampling Settings**
    seed = -1 #@param
    sampler = 'ddim' #@param ["klms","dpm2","dpm2_ancestral","heun","euler","euler_ancestral","plms", "ddim", "dpm_fast", "dpm_adaptive", "dpmpp_2s_a", "dpmpp_2m"]
    steps = 30 #@param
    scale = 15 #@param
    ddim_eta = 0.0
    dynamic_threshold = None
    static_threshold = None   

    ##@markdown **Save & Display Settings**
    save_samples = True 
    save_settings = True 
    display_samples = True 
    save_sample_per_step = False 
    show_sample_per_step = False 

    #@markdown **Batch Settings**
    n_batch = 1 #@param
    n_samples = 1 #@param
    batch_name = "StableArt" #@param {type:"string"}
    filename_format = "{timestring}_{index}_{prompt}.png" 
    seed_behavior = "iter" #@param ["iter","fixed","random","ladder","alternate"]
    seed_iter_N = 1 
    make_grid = False #@param {type:"boolean"}
    grid_rows = 2 #@param 
    outdir = get_output_folder(root.output_path, batch_name)

    #@markdown **Initial Image Settings**
    use_init = False #@param {type:"boolean"}
    strength = 0.65 #@param {type:"number"}
    strength_0_no_init = True # Set the strength to 0 automatically when no init image is used
    init_image = "https://cdn.pixabay.com/photo/2022/07/30/13/10/green-longhorn-beetle-7353749_1280.jpg" #@param {type:"string"}
    add_init_noise = False #@param {type:"boolean"}
    init_noise = 0.01 #@param
    # Whiter areas of the mask are areas that change more
    use_mask = False
    use_alpha_as_mask = False # use the alpha channel of the init image as the mask
    mask_file = "https://www.filterforge.com/wiki/images/archive/b/b7/20080927223728%21Polygonal_gradient_thumb.jpg" 
    invert_mask = False 
    # Adjust mask image, 1.0 is no adjustment. Should be positive numbers.
    mask_brightness_adjust = 1.0  
    mask_contrast_adjust = 1.0  
    # Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding
    overlay_mask = True  
    # Blur edges of final overlay mask, if used. Minimum = 0 (no blur)
    mask_overlay_blur = 5 

    ##@markdown **Exposure/Contrast Conditional Settings**
    mean_scale = 0 
    var_scale = 0 
    exposure_scale = 0 
    exposure_target = 0.5 

    ##@markdown **Color Match Conditional Settings**
    colormatch_scale = 0 
    colormatch_image = "https://www.saasdesign.io/wp-content/uploads/2021/02/palette-3-min-980x588.png"
    colormatch_n_colors = 4 
    ignore_sat_weight = 0 

    ##@markdown **CLIP\Aesthetics Conditional Settings**
    clip_name = 'ViT-L/14' 
    clip_scale = 0 
    aesthetics_scale = 0 
    cutn = 1 
    cut_pow = 0.0001

    ##@markdown **Other Conditional Settings**
    init_mse_scale = 0 
    init_mse_image = "https://cdn.pixabay.com/photo/2022/07/30/13/10/green-longhorn-beetle-7353749_1280.jpg" 
    blue_scale = 0 
    
    ##@markdown **Conditional Gradient Settings**
    gradient_wrt = 'x0_pred' 
    gradient_add_to = 'both' 
    decode_method = 'linear' 
    grad_threshold_type = 'dynamic' 
    clamp_grad_threshold = 0.2 
    clamp_start = 0.2 
    clamp_stop = 0.01 
    grad_inject_timing = list(range(1,10)) 

    ##@markdown **Speed vs VRAM Settings**
    cond_uncond_sync = True 
    precision = 'autocast' 
    C = 4
    f = 8

    cond_prompt = ""
    cond_prompts = ""
    uncond_prompt = ""
    uncond_prompts = ""
    timestring = ""
    init_latent = None
    init_sample = None
    init_sample_raw = None
    mask_sample = None
    init_c = None
    seed_internal = 0

    return locals()

args_dict = DeforumArgs()
anim_args_dict = DeforumAnimArgs()

if override_settings_with_file:
    load_args(args_dict, anim_args_dict, settings_file, custom_settings_file, verbose=False)

args = SimpleNamespace(**args_dict)
anim_args = SimpleNamespace(**anim_args_dict)

args.timestring = time.strftime('%Y%m%d%H%M%S')
args.strength = max(0.0, min(1.0, args.strength))

# Load clip model if using clip guidance
if (args.clip_scale > 0) or (args.aesthetics_scale > 0):
    root.clip_model = clip.load(args.clip_name, jit=False)[0].eval().requires_grad_(False).to(root.device)
    if (args.aesthetics_scale > 0):
        root.aesthetics_model = load_aesthetics_model(args, root)

if args.seed == -1:
    args.seed = random.randint(0, 2**32 - 1)
if not args.use_init:
    args.init_image = None
if args.sampler == 'plms' and (args.use_init or anim_args.animation_mode != 'None'):
    print(f"Init images aren't supported with PLMS yet, switching to KLMS")
    args.sampler = 'klms'
if args.sampler != 'ddim':
    args.ddim_eta = 0

if anim_args.animation_mode == 'None':
    anim_args.max_frames = 1
elif anim_args.animation_mode == 'Video Input':
    args.use_init = True

# clean up unused memory
gc.collect()
torch.cuda.empty_cache()

# dispatch to appropriate renderer
if anim_args.animation_mode == '2D' or anim_args.animation_mode == '3D':
    render_animation(root, anim_args, args, cond_prompts, uncond_prompts)
elif anim_args.animation_mode == 'Video Input':
    render_input_video(root, anim_args, args, cond_prompts, uncond_prompts)
elif anim_args.animation_mode == 'Interpolation':
    render_interpolation(root, anim_args, args, cond_prompts, uncond_prompts)
else:
    render_image_batch(root, args, cond_prompts, uncond_prompts)