<a href="https://colab.research.google.com/github/lowfuel/DiscoDiffusion-Warp-gobig/blob/lowfuel/CLIP_Evaluator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CLIP Artist Evaluator

This notebook allows you to provide a sample of art, then have CLIP evaluate the images and tell you who it thinks the artist is.

Please considering supporting me [Patreon](https://www.patreon.com/user?u=255893&fan_landing=true) to keep this notebook updated and improving. Thanks!

### Credits & Changelog ⬇️

#### Credits

by Jason Hough (lowfuel)

#### License

Licensed under the MIT License

Copyright (c) 2022 Jason Hough 

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

# 1. Set Up

In [None]:
#@title 1.1 Check GPU Status
#@markdown Note: Works fine without GPU (just a bit slow)
import subprocess
simple_nvidia_smi_display = True#@param {type:"boolean"}
if simple_nvidia_smi_display:
  #!nvidia-smi
  nvidiasmi_output = subprocess.run(['nvidia-smi', '-L'], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(nvidiasmi_output)
else:
  #!nvidia-smi -i 0 -e 0
  nvidiasmi_output = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(nvidiasmi_output)
  nvidiasmi_ecc_note = subprocess.run(['nvidia-smi', '-i', '0', '-e', '0'], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(nvidiasmi_ecc_note)

In [None]:
#@title 1.2 Prepare Folders
import subprocess, os, sys, ipykernel

def gitclone(url):
  res = subprocess.run(['git', 'clone', url], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(res)

def pipi(modulestr):
  res = subprocess.run(['pip', 'install', modulestr], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(res)

def pipie(modulestr):
  res = subprocess.run(['git', 'install', '-e', modulestr], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(res)

def wget(url, outputdir):
  res = subprocess.run(['wget', url, '-P', f'{outputdir}'], stdout=subprocess.PIPE).stdout.decode('utf-8')
  print(res)

import os

try:
    from google.colab import drive
    print("Google Colab detected. Using Google Drive.")
    is_colab = True
    #@markdown If you connect your Google Drive, you can save the final image of each run on your drive.
    google_drive = True #@param {type:"boolean"}
    #@markdown Click here if you'd like to save the diffusion model checkpoint file to (and/or load from) your Google Drive:
    save_models_to_google_drive = True #@param {type:"boolean"}
except:
    is_colab = False
    google_drive = False
    save_models_to_google_drive = False
    print("Google Colab not detected.")

if is_colab:
    if google_drive is True:
        drive.mount('/content/drive')
        root_path = '/content/drive/MyDrive/AI/Disco_Diffusion'
    else:
        root_path = '/content'
else:
    root_path = os.getcwd()

import os
def createPath(filepath):
    os.makedirs(filepath, exist_ok=True)

initDirPath = f'{root_path}/init_images'
createPath(initDirPath)
outDirPath = f'{root_path}/images_out'
createPath(outDirPath)

if is_colab:
    if google_drive and not save_models_to_google_drive or not google_drive:
        model_path = '/content/models'
        createPath(model_path)
    if google_drive and save_models_to_google_drive:
        model_path = f'{root_path}/models'
        createPath(model_path)
else:
    model_path = f'{root_path}/models'
    createPath(model_path)

if os.path.exists(f"{root_path}/prompts.txt"):
  os.remove(f"{root_path}/prompts.txt")

wget("https://raw.githubusercontent.com/lowfuel/DiscoDiffusion-Warp-gobig/lowfuel/prompts.txt", root_path)

def createPath(filepath):
    os.makedirs(filepath, exist_ok=True)


In [None]:
%%capture
#@title ### 1.3 Install and import dependencies

import pathlib, shutil, sys

PROJECT_DIR = os.path.abspath(os.getcwd())

multipip_res = subprocess.run(['pip', 'install', 'lpips', 'datetime', 'timm', 'ftfy', 'einops', 'pytorch-lightning', 'omegaconf' ], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(multipip_res)

import os
from os import path
import sys
import io

from google.colab import files

from attr import has
from numpy import average
import torch
from torch import nn
import ipywidgets as widgets
from IPython.display import display, HTML
import requests
try:
  from CLIP import clip
except:
  if not os.path.exists("CLIP"):
    gitclone("https://github.com/openai/CLIP")
  sys.path.append(f'{PROJECT_DIR}/CLIP')
  from CLIP import clip
import gc
from statistics import mean
from PIL import Image, ImageOps
import urllib.request, urllib.error, urllib.parse
from itertools import chain, islice
import unicodedata

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Using device:', DEVICE)
device = DEVICE # At least one of the modules expects this name..

if 'cuda' in str(device):
  if torch.cuda.get_device_capability(DEVICE) == (8,0): ## A100 fix thanks to Emad
    print('Disabling CUDNN for A100 gpu', file=sys.stderr)
    torch.backends.cudnn.enabled = False


# 2. Select your CLIP models

In [None]:
#@title ### 2.1 Enable Models
#@markdown Selecting them all is fine, but will take longer to process.

ViTB32 = True #@param{type:"boolean"}
ViTB16 = False #@param{type:"boolean"}
ViTL14 = False #@param{type:"boolean"}
ViTL14_336 = True #@param{type:"boolean"}
RN101 = False #@param{type:"boolean"}
RN50 = True #@param{type:"boolean"}
RN50x4 = False #@param{type:"boolean"}
RN50x16 = False #@param{type:"boolean"}
RN50x64 = True #@param{type:"boolean"}

#@markdown Run stylistic similarity? (adds a few minutes to the evaluation)
run_similarity = False #@param{type:"boolean"}

#@markdown CLIP model to use for stylistic similarity:
similarity_model = 'ViT-L/14@336px' #@param ["ViT-B/32", "ViT-B/16", "ViT-L/14", "ViT-L/14@336px", "RN101", "RN50", "RN50x4", "RN50x16", "RN50x64"]

# 3. Configure Artist

In [None]:
#@title ### 3.1 Enter artist name
#@markdown Which artist would you like to test?
#@markdown Note: Leave blank if you don't know, or make up a name.
evalprompt = "Peter Mohrbacher" #@param{type:"string"}

#@title ### 3.2 Link to a sample piece of art
#@markdown URL to a sample piece of art by this artist (PNG or JPEG is fine, ideally no bigger than 640x640)
#uploaded = files.upload()
sample_image = "https://cdna.artstation.com/p/assets/images/images/016/964/598/smaller_square/peter-mohrbacher-israfel-visitf.jpg" #@param{type:"string"}

# 4. RESULTS

In [None]:
#@title ### 4.1 The Results will display here after processing. Be patient!

modellist = []
if ViTB32: modellist.append('ViT-B/32')
if ViTB16: modellist.append('ViT-B/16')
if ViTL14: modellist.append('ViT-L/14')
if ViTL14_336: modellist.append('ViT-L/14@336px')
if RN101: modellist.append('RN101')
if RN50: modellist.append('RN50')
if RN50x4: modellist.append('RN50x4')
if RN50x16: modellist.append('RN50x16')
if RN50x64: modellist.append('RN50x64')

def fetch(url_or_path):
    if str(url_or_path).startswith('http://') or str(url_or_path).startswith(
            'https://'):
        #print(f'Fetching {str(url_or_path)}. \nThis might take a while... please wait.')
        r = requests.get(url_or_path)
        r.raise_for_status()
        fd = io.BytesIO()
        fd.write(r.content)
        fd.seek(0)
        return fd
    return open(url_or_path, 'rb')

def batch_iterable(iterable, n):
    # yield (like return but remembers where it was) n items per request
    it = iter(iterable)
    for first in it:
        yield list(chain([first], islice(it, n - 1)))

def load_clip_model(model_name):
    model, preprocess = clip.load(model_name, jit=False, download_root=model_path,device=device)
    return model, preprocess

def clipit_text(prompt, model):
    text = model.encode_text(clip.tokenize(prompt).to(device)).float()
    return text

def clipit_image(im_prompt, model):
    image = model.encode_image(im_prompt.to(device)).float()
    return image

def evalprompt_cos(t_prompt, t_comp):
    #Get similarity between two tensors
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    t_con = cos(t_prompt, t_comp)
    similarity = t_con.item()
    return similarity

def match_prompts_to_image(image, prompts, model):
    t_prompts = clip.tokenize(prompts).to(device)
    with torch.no_grad():
        logits_per_image, logits_per_text = model(image, t_prompts)
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
    return probs

def loadprompts(textfile):
    prompts = []
    with open(textfile, encoding="utf-8") as f:
        for line in f:
            prompts.append(line.strip())
    return(prompts)
    
def scoreprompts(evalprompt, images, prompts, modelname):
    model, preprocess = load_clip_model(modelname)
    scores = []
    #print(images[0])
    test_image = preprocess(images[0]).unsqueeze(0).to(device)
    probabilities = match_prompts_to_image(test_image, prompts, model)
    problist = (probabilities.tolist())[0]
    for count, prompt in enumerate(prompts):
        prob = round(problist[count], 4)
        if prob > 0.03 or prompt == evalprompt:
            scores.append((prompt, prob))
            #print(f'Probability: {prompt} - {prob}')
    return scores

def find_similar(evalprompt, images, prompts, model, preprocess):
    t_prompts = []
    stylistic_scores = []
    results = []
    most_stylistic = []
    sample_image = preprocess(images[0]).unsqueeze(0).to(device)
    t_image = clipit_image(sample_image, model)
    for prompt in prompts:
        t_prompt = clipit_text(prompt, model)
        t_prompts.append((prompt, t_prompt))
    for comparison, comp_prompt in t_prompts:
        stylistic_score = evalprompt_cos(comp_prompt, t_image)
        stylistic_scores.append((comparison, stylistic_score))
    return stylistic_scores

images = []
images.append(Image.open(fetch(sample_image)))

prescript = "by"

raw_prompts = loadprompts((f"{root_path}/prompts.txt"))
prompts = []
post_prompts = []
for prompt in raw_prompts:
    prompts.append(prescript + " " + prompt)

if (evalprompt) not in raw_prompts:
    print('A new artist. Please let Lowfuel know so they can be added to the master comparison list!')
    prompts.append(prescript + " " + evalprompt)

print("Please stand by. Processing...")
result = (f'<h2>CLIP associations for this sample image:</h2>')

#display(images[0])
result += '<table><tr style="vertical-align:top">'
average = 0.0
this_artist_score = []
result += '<td><table>' # nested table for results
for modelname in modellist:
    print(f'Evaluating with {modelname}...')
    result += (f'<tr><td><p><i>{modelname} thinks this sample is:</i></p>')
    scores = scoreprompts((prescript + " " + evalprompt), images, prompts, modelname)
    scores.sort(key=lambda a: a[1], reverse=True)
    for score in scores:
        name, percent = score
        if name == (prescript + " " + evalprompt):
            this_artist_score.append(percent)
        # Bing doesn't like unicode text, so convert to ascii first
        query = (unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode())
        query = query.split()
        query ='+'.join(query) # replace the spaces in the query with + symbols
        url = 'https://www.bing.com/images/search?q=' + query + '&qs=n&form=QBILPG&sp=-1&ghc=1'
        html_with_link = (f'<p><a href="{url}" target="_blank" rel="noopener noreferrer">{name}</a> (with {percent:.2%} certainty)</p>')
        result += html_with_link
    result += '</td></tr>'
result +='</table>'
result += (f'<td><img src={sample_image} width="320"></td>')
result +='</tr></table>'
display(HTML(f'{result}'))
average = mean(this_artist_score)
print('')
display(HTML(f'<p>Average confidence that this image was by {evalprompt}: <font size="+2"><b>{average:.2%}</b></font></p>'))

# check for cosine similarity if user wishes
if run_similarity:
    print("")
    print("Now finding stylistic matches, please wait...")
    style_matches = []
    most_stylistic = []
    for prompt_batch in batch_iterable(prompts, 128):
        #speed this up by loading the clip model first, then just pass the model
        model, preprocess = load_clip_model(similarity_model)
        style_matches.append(find_similar((prescript + " " + evalprompt), images, prompt_batch, model, preprocess))
    style_matches = style_matches[0]
    style_matches.sort(key=lambda y: y[1], reverse=True)
    for i in range(4):
        name, score = style_matches[i]
        most_stylistic.append(name)

    result = '<h3>The four best stylistic matches to this sample, in order:</h3>'
    for name in most_stylistic:
        query = (unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode())
        query = query.split()
        query ='+'.join(query) # replace the spaces in the query with + symbols
        url = 'https://www.bing.com/images/search?q=' + query + '&qs=n&form=QBILPG&sp=-1&ghc=1'
        html_with_link = (f'<p><a href="{url}" target="_blank" rel="noopener noreferrer">{name}</a></p>')
        result += html_with_link
    display(HTML(f'{result}'))
