In [1]:
import numpy as np
import os
import torch
from torchvision import transforms
from tqdm import tqdm

import pandas as pd
import PIL

import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sd_version = '2_1'
pretrained_model = 'laion2b_s34b_b79k'
path_image_tensors = '../sd_' + sd_version + '_' + pretrained_model + '_ViT-B-32.pt'

In [3]:
import open_clip
from open_clip import tokenizer
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained=pretrained_model)

In [4]:
data1 = []
# change the image folders here
path = "../" + sd_version + "/"
number_of_images = len(os.listdir(path))
print(number_of_images)
for i in range(number_of_images):
    img = PIL.Image.open(path + str(i) + ".png")
    data1.append(img)
    

3735


In [5]:
data2 = []
# change the image folders here
path = "../historic_" + sd_version + "_v2/"
number_of_images = len(os.listdir(path))
print(number_of_images)
for i in range(number_of_images):
    img = PIL.Image.open(path + str(i) + ".png")
    data2.append(img)
    

3735


In [6]:
#concat array data1 and data2
data = data1 + data2

In [7]:
len(data)

7470

In [8]:
device = torch.device('cuda')
model.to(device)

CLIP(
  (visual): VisionTransformer(
    (patchnorm_pre_ln): Identity()
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (patch_dropout): Identity()
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): ModuleList(
        (0): ResidualAttentionBlock(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ls_1): Identity()
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): GELU(approximate='none')
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ls_2): Identity()
        )
        (1): ResidualAttentionBlock(
          (l

In [9]:
image_features = []
batch_size = 512

with torch.no_grad():
    for i in tqdm(range(0, len(data), batch_size)):
        batch = []
        for j in range(i, min(i+batch_size, len(data))):
            img = data[j]
            img = preprocess(img)
            batch.append(img)
    
        batch = torch.tensor(np.stack(batch))
        batch = batch.to(device)
        img_feats = model.encode_image(batch).float()
        image_features.append(img_feats)
image_features_torch = torch.concatenate(image_features).cpu()

100%|██████████| 15/15 [02:49<00:00, 11.28s/it]


In [10]:
del data
del data1
del data2

In [11]:
# read csv file
import pandas as pd
df = pd.read_csv('../gen_images_'+sd_version+'.csv')
df.head()

print(df.shape)

(3735, 5)


In [12]:
df2 = pd.read_csv('../gen_images_historic_'+sd_version+'_v2.csv')
#concat 
df = pd.concat([df, df2])
print(df.shape)

(7470, 5)


In [13]:
artists = df['artist'].unique()
text_features = []

with torch.no_grad():
    for artist in artists:
        prompt = "The following work is done in the style of " + artist
        text_tokens = tokenizer.tokenize(prompt)
        text_tokens = text_tokens.to(device)
        txt_feat = model.encode_text(text_tokens).float()
        text_features.append(txt_feat)
text_features_torch = torch.concatenate(text_features).cpu()

In [14]:
idx2artist = {}
artist2idx = {}
for i, artist in enumerate(artists):
    idx2artist[i] = artist
    artist2idx[artist] = i

In [15]:
image_features_torch /= image_features_torch.norm(dim=-1, keepdim=True)
text_features_torch /= text_features_torch.norm(dim=-1, keepdim=True)

text_probs = (100.0 * image_features_torch @ text_features_torch.T).softmax(dim=-1)
top_probs, top_k_labels = text_probs.cpu().topk(5, dim=-1)

In [16]:
gt_labels = torch.tensor([artist2idx[x] for x in df['artist'].to_list()])

top_one_labels = top_k_labels[:, 0]

print(gt_labels.shape, top_one_labels.shape)

correct = (gt_labels == top_one_labels).sum()
print(f"Top 1 score is {round((correct / gt_labels.shape[0]).item() * 100, 2)}")


topk_correct = 0
for i in range(5):
    top_one_labels = top_k_labels[:, i]
    correct = (gt_labels == top_one_labels).sum()
    topk_correct += correct
print(f"Top 5 score is {round((topk_correct / gt_labels.shape[0]).item() * 100, 2)}")
# correct = (gt_labels.repeat() == top_k_labels).sum()
# print(correct)

torch.Size([7470]) torch.Size([7470])
Top 1 score is 5.86
Top 5 score is 17.05


In [17]:
#compute the artist accuracy
artist_accuracy = {}
for i in range(len(artists)):
    artist_accuracy[artists[i]] = 0
for i in range(len(gt_labels)):
    if gt_labels[i] == top_one_labels[i]:
        artist_accuracy[artists[gt_labels[i]]] += 1
for i in range(len(artists)):
    artist_accuracy[artists[i]] /= len(df[df['artist'] == artists[i]])
    #multiply by 100 to get percentage
    artist_accuracy[artists[i]] *= 100
print(artist_accuracy)


{'Vincent van Gogh': 3.3333333333333335, 'Claude Monet': 0.0, 'Rembrandt': 0.0, 'Raphael': 0.0, 'Paul Cézanne': 3.3333333333333335, 'Albrecht Dürer': 0.0, 'Paul Gauguin': 0.0, 'Gustav Klimt': 10.0, 'Francisco Goya': 0.0, 'Pierre-Auguste Renoir': 0.0, 'Peter Paul Rubens': 0.0, 'Auguste Rodin': 0.0, 'Frida Kahlo': 3.3333333333333335, 'Titian': 0.0, 'Edgar Degas': 0.0, 'Édouard Manet': 6.666666666666667, 'Edvard Munch': 3.3333333333333335, 'Diego Rivera': 6.666666666666667, 'Eugène Delacroix': 3.3333333333333335, 'Henri de Toulouse-Lautrec': 0.0, 'William Blake': 0.0, 'Paul Klee': 3.3333333333333335, 'El Greco': 0.0, 'Sandro Botticelli': 0.0, 'J. M. W. Turner': 0.0, 'Osamu Tezuka': 3.3333333333333335, 'Gustave Courbet': 0.0, 'Hokusai': 6.666666666666667, 'Camille Pissarro': 0.0, 'Giorgio Vasari': 0.0, 'Michelangelo': 0.0, 'Leonardo da Vinci': 0.0, 'Egon Schiele': 3.3333333333333335, 'JR': 3.3333333333333335, "Georgia O'Keeffe": 3.3333333333333335, 'Hiroshige': 0.0, 'Tintoretto': 0.0, 'Ant

In [18]:
#sort
sorted_accuracy = sorted(artist_accuracy.items(), key=lambda x: x[1], reverse=True)
print(sorted_accuracy)

[('Eugène Atget', 13.333333333333334), ('Albert Bierstadt', 13.333333333333334), ('Peder Severin Krøyer', 13.333333333333334), ('Gustav Klimt', 10.0), ('Thomas Gainsborough', 10.0), ('Yoshitoshi', 10.0), ('Lucio Fontana', 10.0), ('Yokoyama Taikan', 10.0), ('John Singleton Copley', 10.0), ('Jan Brueghel the Elder', 10.0), ('Édouard Manet', 6.666666666666667), ('Diego Rivera', 6.666666666666667), ('Hokusai', 6.666666666666667), ('Fernando Botero', 6.666666666666667), ('Lucas Cranach the Elder', 6.666666666666667), ('Nicholas Roerich', 6.666666666666667), ('Odilon Redon', 6.666666666666667), ('Winslow Homer', 6.666666666666667), ('Juan Gris', 6.666666666666667), ('Édouard Vuillard', 6.666666666666667), ('Henri Rousseau', 6.666666666666667), ('Alfred Eisenstaedt', 6.666666666666667), ('Maruyama Ōkyo', 6.666666666666667), ('Hieronymus Bosch', 6.666666666666667), ('Frederic Leighton', 6.666666666666667), ('Utagawa Toyokuni', 6.666666666666667), ('Fra Angelico', 6.666666666666667), ('Salvador

In [19]:
#higher 10
print("Top 10")
for name, value in sorted_accuracy[:10]:
    print(name, round(value, 2))

Top 10
Eugène Atget 13.33
Albert Bierstadt 13.33
Peder Severin Krøyer 13.33
Gustav Klimt 10.0
Thomas Gainsborough 10.0
Yoshitoshi 10.0
Lucio Fontana 10.0
Yokoyama Taikan 10.0
John Singleton Copley 10.0
Jan Brueghel the Elder 10.0


In [20]:
#higher 10
print("Top 10")
for name, value in sorted_accuracy[:10]:
    print('|'+name+'|' + str(round(value, 2))+'%|')

Top 10
|Eugène Atget|13.33%|
|Albert Bierstadt|13.33%|
|Peder Severin Krøyer|13.33%|
|Gustav Klimt|10.0%|
|Thomas Gainsborough|10.0%|
|Yoshitoshi|10.0%|
|Lucio Fontana|10.0%|
|Yokoyama Taikan|10.0%|
|John Singleton Copley|10.0%|
|Jan Brueghel the Elder|10.0%|


In [21]:
#compute the prompts accuracy
prompts = df['prompt'].unique()
prompt_accuracy = {}
for i in range(len(prompts)):
    prompt_accuracy[prompts[i]] = 0
for i in range(len(gt_labels)):
    if gt_labels[i] == top_one_labels[i]:
        prompt_accuracy[df['prompt'].to_list()[i]] += 1
for i in range(len(prompts)):
    prompt_accuracy[prompts[i]] /= len(df[df['prompt'] == prompts[i]])
    #multiply by 100 to get percentage
    prompt_accuracy[prompts[i]] *= 100

In [22]:
#sort
sorted_accuracy = sorted(prompt_accuracy.items(), key=lambda x: x[1], reverse=True)
print(sorted_accuracy)

[('A romantic and dreamy scene of a couple in a hot air balloon, floating over a picturesque countryside.', 4.417670682730924), ('A romantic scene with a couple sitting on a park bench, surrounded by autumn leaves and a beautiful sunset.', 4.417670682730924), ('A mysterious scene with an ancient temple hidden deep in a jungle, with vines and moss covering the walls.', 4.016064257028113), ('A spooky and eerie abandoned carnival scene with empty rides, broken lights, and a creepy clown lurking in the shadows.', 3.614457831325301), ('A tranquil space scene with a spaceship orbiting a planet, shooting stars, and a constellation of stars in the distance.', 3.614457831325301), ('A joyful and heartwarming scene of a group of children playing in a park, with swings, slides, and a merry-go-round.', 3.614457831325301), ('A dramatic and mystical scene of a witch performing a ritual in a dark forest, surrounded by candles and other supernatural objects.', 2.8112449799196786), ('A cozy and festive 

In [23]:
#get prompts with their mean accuracy
print("Top 5")
for name, value in sorted_accuracy[:5]:
    print(name, round(value, 2))
print("Bottom 5")
for name, value in sorted_accuracy[-5:]:
    print(name, round(value, 2))

Top 5
A romantic and dreamy scene of a couple in a hot air balloon, floating over a picturesque countryside. 4.42
A romantic scene with a couple sitting on a park bench, surrounded by autumn leaves and a beautiful sunset. 4.42
A mysterious scene with an ancient temple hidden deep in a jungle, with vines and moss covering the walls. 4.02
A spooky and eerie abandoned carnival scene with empty rides, broken lights, and a creepy clown lurking in the shadows. 3.61
A tranquil space scene with a spaceship orbiting a planet, shooting stars, and a constellation of stars in the distance. 3.61
Bottom 5
A dark and mysterious scene of an abandoned subway station, with flickering lights and eerie echoes. 0.8
A futuristic and eco-friendly scene of a city with massive vertical gardens covering skyscrapers, parks, and rooftop terraces. 0.8
A dreamy and surreal cloudscape scene with fluffy white clouds, a rainbow, and a hot air balloon floating in the distance. 0.4
A dynamic and exciting skateboard park

In [24]:
#get prompts with their mean accuracy
for name, value in sorted_accuracy[:5]:
    print('|'+name+'|' + str(round(value, 2))+'%|')
print("|...|...|")
for name, value in sorted_accuracy[-5:]:
    print('|'+name+'|' + str(round(value, 2))+'%|')

|A romantic and dreamy scene of a couple in a hot air balloon, floating over a picturesque countryside.|4.42%|
|A romantic scene with a couple sitting on a park bench, surrounded by autumn leaves and a beautiful sunset.|4.42%|
|A mysterious scene with an ancient temple hidden deep in a jungle, with vines and moss covering the walls.|4.02%|
|A spooky and eerie abandoned carnival scene with empty rides, broken lights, and a creepy clown lurking in the shadows.|3.61%|
|A tranquil space scene with a spaceship orbiting a planet, shooting stars, and a constellation of stars in the distance.|3.61%|
|...|...|
|A dark and mysterious scene of an abandoned subway station, with flickering lights and eerie echoes.|0.8%|
|A futuristic and eco-friendly scene of a city with massive vertical gardens covering skyscrapers, parks, and rooftop terraces.|0.8%|
|A dreamy and surreal cloudscape scene with fluffy white clouds, a rainbow, and a hot air balloon floating in the distance.|0.4%|
|A dynamic and exci