```
   ____                 _   _           _       _     _                 
  / ___|   __ _   ___  | | (_)   __ _  | |__   | |_  (_)  _ __     __ _ 
 | |  _   / _` | / __| | | | |  / _` | | '_ \  | __| | | | '_ \   / _` |
 | |_| | | (_| | \__ \ | | | | | (_| | | | | | | |_  | | | | | | | (_| |
  \____|  \__,_| |___/ |_| |_|  \__, | |_| |_|  \__| |_| |_| |_|  \__, |
                                |___/                             |___/ 

```

## Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
# Modify this line if you want to change storage location, $gdrive is the root variable
gdrive = '/content/gdrive/MyDrive'

## Tool Installs and Updates

Install some tools from a few git projects.

In [None]:
import os
%pip install gallery-dl

# checking if the directory exists
%cd $gdrive
if not os.path.exists("sd"):
  os.makedirs("$gdrive/sd")

%cd $gdrive/sd
if not os.path.exists("tools"):
  os.makedirs("$gdrive/sd/tools")

# Gaslighting caption tools
%cd $gdrive/sd/tools
if not os.path.exists("gaslighting"):
  print('folder doesn\'t exist! making running git clone next...')
  !git clone https://github.com/mediocreatmybest/gaslightingeveryone gaslighting
else: 
  print('Gaslighting exists.')
# Clip Interrogator
if not os.path.exists("clip-interrogator"):
  print('folder doesn\'t exist! making running git clone next...')
  !git clone https://github.com/pharmapsychotic/clip-interrogator clip-interrogator
else: 
  print('Clip Interrogator exists.')
# Every Dream
if not os.path.exists("EveryDream"):
  print('folder doesn\'t exist! making running git clone next...')
  !git clone https://github.com/victorchall/EveryDream EveryDream
else: 
  print('EveryDream exists.')


In [None]:
import os
import subprocess

#@markdown # Update all git repositories for tools folder
#@markdown Update git repositories / reset and discard changes / etc.

update_all = True #@param {type:"boolean"}
#@markdown  - Update every git repository within the tools folder

if update_all is True:
    %cd $gdrive/sd/tools
    !find $pwd -mindepth 1 -maxdepth 1 -type d -exec git --git-dir={}/.git --work-tree=$PWD/{} pull \;

## Gallery-DL / Download images

### Setup simple image folder

In [None]:
# checking if the directory exists
%cd $gdrive/sd/tools
if not os.path.exists("datasets"):
  os.makedirs("datasets")
%cd $gdrive/sd/tools/datasets
# By default Galler-DL saves into a sub directory called gallery-dl then by URL/website

In [None]:
#@title Gallery-DL
#@markdown Set your preferences, the option download-archive should keep a record of previously downloaded images  

#@markdown If you are using a text file saved in google drive, use the side file menu to copy path
URL_OR_TXT = '/content/gdrive/MyDrive/sd/tools/datasets/example.txt'  #@param {type: "string"}
arguments = '--write-metadata --sleep 1-2 --download-archive gallerydb.sql3'  #@param {type: "string"}
use_range = "no"  #@param ['yes', 'no']
use_textfile = "yes"  #@param ['yes', 'no']
range_start = 1  #@param {type: "slider", min: 1, max: 200}
range_stop = 10  #@param {type: "slider", min: 2, max: 200}

#@markdown ---

In [None]:
#@title Press Play on Tape (download)
%cd $gdrive/sd/tools/datasets
if use_range == 'yes':
  if use_textfile =='yes':
    !gallery-dl $arguments --range $range_start-$range_stop -i $URL_OR_TXT
  else:
    !gallery-dl $arguments --range $range_start-$range_stop $URL_OR_TXT

if use_range == 'no':
  if use_textfile =='yes':
    !gallery-dl $arguments -i $URL_OR_TXT
  else:
    !gallery-dl $arguments $URL_OR_TXT


In [None]:
%cd /content/gdrive/MyDrive/sd/tools/datasets/gallery-dl
!find $PWD -name *.gif -delete

## Create or modify captions

In [None]:
from os.path import exists

#@title Gaslighting - Create or Modify Captions.  

#@markdown Select the root folder you wish to caption
image_dir = '/content/gdrive/MyDrive/sd/tools/datasets/gallery-dl'  #@param {type: "string"}
disable_title = False #@param {type:"boolean"}
#@markdown  - Disables title from json
disable_desc = True #@param {type:"boolean"}
#@markdown  - Disables description from json
disable_tags = False #@param {type:"boolean"}
#@markdown  - Disables tags from json
remove_hash = True #@param {type:"boolean"}
#@markdown  - Removes hash symbol from tags in json
disable_exif = False #@param {type:"boolean"}
#@markdown  - Disables EXIF Camera data from json
append = False #@param {type:"boolean"}
#@markdown  - Appends written file only, does not overwrite
debug = False #@param {type:"boolean"}
#@markdown  - Disables Saving, prints some information out instead
del_appended_csv = True #@param {type:"boolean"}
#@markdown  - Deletes the appeneded CSV, otherwise re-running the command will keep appending to this file

#@markdown ---

cmd_arg_str = ''
cmd_arg_str = f'json2txt.py --imagedir {image_dir} '

if disable_title is True:
  cmd_arg_str = cmd_arg_str + '--disable-title '
if disable_desc is True:
  cmd_arg_str = cmd_arg_str + '--disable-desc '
if disable_tags is True:
  cmd_arg_str = cmd_arg_str + '--disable-tags '
if remove_hash is True:
  cmd_arg_str = cmd_arg_str + '--remove-hash '
if disable_exif is True:
  cmd_arg_str = cmd_arg_str + '--disable-exif '
if append is True:
  cmd_arg_str = cmd_arg_str + '--append '
if debug is True:
  cmd_arg_str = cmd_arg_str + '--debug '
if del_appended_csv is True:
  if exists('{image_dir}/appended_captions.txt') is True:
    print('appended_captions.txt deleted')
    !rm {image_dir}/appended_captions.txt

# Run Script

%cd $gdrive/sd/tools/gaslighting/tools
!python {cmd_arg_str}



In [None]:
#@markdown ### Clip Interrogator
#@markdown Modified from the original scripts and examples at: https://github.com/pharmapsychotic/clip-interrogator  

#@markdown **Check GPU**
!nvidia-smi -L

In [None]:
#@title Setup
#@markdown Select ViT-H-14/laion2b_s32b_b79k as the model to use with Stable Diffusion 2.x  

#@markdown Select Vit-L-14/openai as the model to use with Stable Diffusion 1.x  

import os, subprocess

def setup():
    install_cmds = [
        ['pip', 'install', 'gradio'],
        ['pip', 'install', 'open_clip_torch'],
        ['pip', 'install', 'clip-interrogator'],
        ['pip', 'install', 'git+https://github.com/pharmapsychotic/BLIP.git'],
    ]
    for cmd in install_cmds:
        print(subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode('utf-8'))

setup()


clip_model_name = 'ViT-H-14/laion2b_s32b_b79k' #@param ["ViT-H-14/laion2b_s32b_b79k", "ViT-L-14/openai"]


print("Download preprocessed cache files...")
CACHE_URLS = [
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-L-14_openai_artists.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-L-14_openai_flavors.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-L-14_openai_mediums.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-L-14_openai_movements.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-L-14_openai_trendings.pkl',
] if clip_model_name == 'ViT-L-14/openai' else [
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-H-14_laion2b_s32b_b79k_artists.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-H-14_laion2b_s32b_b79k_flavors.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-H-14_laion2b_s32b_b79k_mediums.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-H-14_laion2b_s32b_b79k_movements.pkl',
    'https://huggingface.co/pharma/ci-preprocess/resolve/main/ViT-H-14_laion2b_s32b_b79k_trendings.pkl',
]
os.makedirs('cache', exist_ok=True)
for url in CACHE_URLS:
    print(subprocess.run(['wget', url, '-P', 'cache'], stdout=subprocess.PIPE).stdout.decode('utf-8'))


import gradio as gr
from clip_interrogator import Config, Interrogator

config = Config()
config.blip_num_beams = 64
config.blip_offload = False
config.clip_model_name = clip_model_name
ci = Interrogator(config)

def inference(image, mode, best_max_flavors=32):
    ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    image = image.convert('RGB')
    if mode == 'best':
        return ci.interrogate(image, max_flavors=int(best_max_flavors))
    elif mode == 'classic':
        return ci.interrogate_classic(image)
    else:
        return ci.interrogate_fast(image)


In [None]:
#@title Image to prompt! 🖼️ -> 📝
   
inputs = [
    gr.inputs.Image(type='pil'),
    gr.Radio(['best', 'fast'], label='', value='best'),
    gr.Number(value=16, label='best mode max flavors'),
]
outputs = [
    gr.outputs.Textbox(label="Output"),
]

io = gr.Interface(
    inference, 
    inputs, 
    outputs, 
    allow_flagging=False,
)
io.launch(debug=False)


In [None]:
#@title Batch process a folder of images 📁 -> 📝

#@markdown Select the folder path to the root of your images, this script is not recursive and will not search subfolders

#@markdown Output options are: _'caption', 'csv', 'rename'_  

#@markdown _Caption_ will create a txt file for every image, _CSV_ will create a summary in a CSV file, _Rename_ will rename each file.
import csv
import os
from IPython.display import clear_output, display
from PIL import Image
from pathlib import Path
from tqdm import tqdm

folder_path = "/content/gdrive/MyDrive/sd/tools/datasets/example" #@param {type:"string"}
prompt_mode = 'best' #@param ["best","fast", "classic"]
output_mode = 'captions' #@param ["captions","csv","rename"]
max_filename_len = 128 #@param {type:"integer"}
best_max_flavors = 2 #@param {type:"integer"}


def sanitize_for_filename(prompt: str, max_len: int) -> str:
    name = "".join(c for c in prompt if (c.isalnum() or c in ",._-! "))
    name = name.strip()[:(max_len-4)] # extra space for extension
    return name

ci.config.quiet = True

files = [f for f in os.listdir(folder_path) if f.endswith('.jpg') or f.endswith('.png')] if os.path.exists(folder_path) else []
prompts = []
for idx, file in enumerate(tqdm(files, desc='Generating prompts')):
    if idx > 0 and idx % 100 == 0:
        clear_output(wait=True)

    image = Image.open(os.path.join(folder_path, file)).convert('RGB')
    prompt = inference(image, prompt_mode, best_max_flavors=best_max_flavors)
    prompts.append(prompt)

    print(prompt)
    thumb = image.copy()
    thumb.thumbnail([256, 256])
    display(thumb)

    if output_mode == 'rename':
        name = sanitize_for_filename(prompt, max_filename_len)
        ext = os.path.splitext(file)[1]
        filename = name + ext
        idx = 1
        while os.path.exists(os.path.join(folder_path, filename)):
            print(f'File {filename} already exists, trying {idx+1}...')
            filename = f"{name}_{idx}{ext}"
            idx += 1
        os.rename(os.path.join(folder_path, file), os.path.join(folder_path, filename))

if len(prompts):
    if output_mode == 'csv':
        csv_path = os.path.join(folder_path, 'desc.csv')
        with open(csv_path, 'w', encoding='utf-8', newline='') as f:
            w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
            w.writerow(['image', 'prompt'])
            for file, prompt in zip(files, prompts):
                w.writerow([file, prompt])

        print(f"\n\n\n\nGenerated {len(prompts)} prompts and saved to {csv_path}, enjoy!")
    elif output_mode == 'captions':
      for file, prompt in zip(files, prompts):
        file_name = os.path.splitext(file)[0] + '.txt'
        file_path = os.path.join(folder_path, file_name)
        with open(file_path, 'w', encoding='utf-8') as f:
          f.write(prompt)

        print(f"\n\n\n\nGenerated {len(prompts)} prompts and saved to {folder_path}, enjoy!")

    else:
        print(f"\n\n\n\nGenerated {len(prompts)} prompts and renamed your files, enjoy!")
else:
    print(f"Sorry, I couldn't find any images in {folder_path}")


## Image resizing

In [None]:
#@title Gaslighting - Image Resizing
#@markdown Set your preferences, the size will apply to the shortest side, maintaining aspect ratio   

#@markdown Select the input directory and an output directory, files will only be copied
input_dir = '/content/gdrive/MyDrive/sd/tools/datasets/gallery-dl'  #@param {type: "string"}
output_dir = '/content/gdrive/MyDrive/sd/tools/datasets/gallery-dl_resize'  #@param {type: "string"}
size = 576  #@param {type: "number"}
copy_format = True #@param {type:"boolean"}
#@markdown  - Keep current format of images e.g. png
format = "png"  #@param ['png', 'jpg', 'jpeg', 'webp', 'bmp']
#@markdown  - Or select your own preference of image format


#@markdown ---


cmd_arg_str = ''

cmd_arg_str = f'images2resize.py --input-dir {input_dir} --output-dir {output_dir} --size {size} '

if copy_format is True:
  cmd_arg_str = cmd_arg_str + '--copy-format'
else: 
  cmd_arg_str = cmd_arg_str + '--format {format}'

%cd $gdrive/sd/tools/gaslighting/tools
!python {cmd_arg_str}
