In [4]:
import numpy as np
import os
import torch
from torchvision import transforms
from tqdm import tqdm

import pandas as pd
import PIL

import matplotlib.pyplot as plt


In [5]:
pretrained_model = 'laion2b_s34b_b79k'
path_image_tensors = '../real_images_artstation_filtered' + '_' + pretrained_model + '_ViT-B-32.pt'

In [6]:
import open_clip
from open_clip import tokenizer
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained=pretrained_model)

In [7]:
device = torch.device('cuda')
model.to(device)

CLIP(
  (visual): VisionTransformer(
    (patchnorm_pre_ln): Identity()
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (patch_dropout): Identity()
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): ModuleList(
        (0): ResidualAttentionBlock(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ls_1): Identity()
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): GELU(approximate='none')
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ls_2): Identity()
        )
        (1): ResidualAttentionBlock(
          (l

In [8]:
image_features_torch = torch.load(path_image_tensors)

In [9]:
# read csv file
import pandas as pd
df = pd.read_csv('../real_images_artstation_filtered.csv')
df.head()

print(df.shape)

(7031, 3)


In [10]:
artists = df['artist'].unique()
text_features = []

with torch.no_grad():
    for artist in artists:
        prompt = "The following work is done in the style of " + artist
        text_tokens = tokenizer.tokenize(prompt)
        text_tokens = text_tokens.to(device)
        txt_feat = model.encode_text(text_tokens).float()
        text_features.append(txt_feat)
text_features_torch = torch.concatenate(text_features).cpu()

In [11]:
idx2artist = {}
artist2idx = {}
for i, artist in enumerate(artists):
    idx2artist[i] = artist
    artist2idx[artist] = i

In [12]:
image_features_torch /= image_features_torch.norm(dim=-1, keepdim=True)
text_features_torch /= text_features_torch.norm(dim=-1, keepdim=True)

text_probs = (100.0 * image_features_torch @ text_features_torch.T).softmax(dim=-1)
top_probs, top_k_labels = text_probs.cpu().topk(5, dim=-1)

In [13]:
gt_labels = torch.tensor([artist2idx[x] for x in df['artist'].to_list()])

top_one_labels = top_k_labels[:, 0]

print(gt_labels.shape, top_one_labels.shape)

correct = (gt_labels == top_one_labels).sum()
print(f"Top 1 score is {round((correct / gt_labels.shape[0]).item() * 100, 2)}")


topk_correct = 0
for i in range(5):
    top_one_labels = top_k_labels[:, i]
    correct = (gt_labels == top_one_labels).sum()
    topk_correct += correct
print(f"Top 5 score is {round((topk_correct / gt_labels.shape[0]).item() * 100, 2)}")
# correct = (gt_labels.repeat() == top_k_labels).sum()
# print(correct)

torch.Size([7031]) torch.Size([7031])
Top 1 score is 3.68
Top 5 score is 8.01


In [14]:
#compute the artist accuracy
artist_accuracy = {}
for i in range(len(artists)):
    artist_accuracy[artists[i]] = 0
for i in range(len(gt_labels)):
    if gt_labels[i] == top_one_labels[i]:
        artist_accuracy[artists[gt_labels[i]]] += 1
for i in range(len(artists)):
    artist_accuracy[artists[i]] /= len(df[df['artist'] == artists[i]])
    #multiply by 100 to get percentage
    artist_accuracy[artists[i]] *= 100
print(artist_accuracy)


{'WLOP': 3.4482758620689653, 'Dao Trong Le': 0.0, 'Zeronis': 0.0, 'Chengwei Pan': 0.0, 'Wenjun Lin': 0.0, 'Grafit Studio': 0.0, 'Sylvain Sarrailh': 6.666666666666667, 'Greg Rutkowski': 0.0, 'Bayard Wu': 0.0, 'Bo Chen': 0.0, 'Tooth Wu': 0.0, 'Anato Finnstark': 0.0, 'Qi Sheng Luo': 0.0, 'Raf Grassetti': 0.0, 'sparth': 3.3333333333333335, 'Hicham Habchi': 0.0, 'Zhelong Xu': 0.0, 'Anthony Chong Jones': 0.0, 'Rudy Siswanto': 0.0, 'Nurzhan Bekkaliyev': 0.0, 'Jama Jurabaev': 0.0, 'Evan Lee': 0.0, 'Anatomy For Sculptors': 0.0, 'Christophe Young': 0.0, 'Eytan Zana': 0.0, 'Darek Zabrocki': 0.0, 'Jakub Rozalski': 0.0, 'Maria Panfilova': 0.0, 'Hou China': 0.0, 'Ismail Inceoglu': 3.3333333333333335, 'Andreas Rocha': 10.0, 'Johnson Ting': 0.0, 'Ching Yeh': 0.0, 'Paul Chadeisson': 3.3333333333333335, 'Igor Sid': 0.0, 'Jonas Ronnegard': 0.0, 'Nivanh Chanthara': 0.0, 'Thomas Chamberlain-Keen': 0.0, 'Steve Zheng': 0.0, 'Hue Teo': 0.0, 'Mauro Belfiore': 0.0, 'Raphael Lacoste': 3.3333333333333335, 'Dave G

In [15]:
#sort
sorted_accuracy = sorted(artist_accuracy.items(), key=lambda x: x[1], reverse=True)
print(sorted_accuracy)

[('Jessica Oyhenart', 13.333333333333334), ('hyeonsick choi (aruana sick)', 13.333333333333334), ('Andreas Rocha', 10.0), ('Taejune Kim', 10.0), ('Rinotuna', 10.0), ('Maxim Verehin', 7.142857142857142), ('Sylvain Sarrailh', 6.666666666666667), ('Alena Aenami', 6.666666666666667), ('Anton Fadeev', 6.666666666666667), ('Krenz Cushart', 6.666666666666667), ('Citemer Liu', 6.666666666666667), ('Jason Chan', 6.666666666666667), ('Irakli Nadar', 6.666666666666667), ('Aoi Ogata', 6.666666666666667), ('sakimi chan', 3.571428571428571), ('Ilya Kuvshinov', 3.571428571428571), ('WLOP', 3.4482758620689653), ('N I X E U', 3.4482758620689653), ('sparth', 3.3333333333333335), ('Ismail Inceoglu', 3.3333333333333335), ('Paul Chadeisson', 3.3333333333333335), ('Raphael Lacoste', 3.3333333333333335), ('Vitaly Bulgarov', 3.3333333333333335), ('Romain Jouandeau', 3.3333333333333335), ('richard anderson. flaptraps art studio', 3.3333333333333335), ('seunghee lee', 3.3333333333333335), ('BASTIEN LECOUFFE DEH

In [16]:
#higher 10
print("Top 10")
for name, value in sorted_accuracy[:10]:
    print(name, round(value, 2))

Top 10
Jessica Oyhenart 13.33
hyeonsick choi (aruana sick) 13.33
Andreas Rocha 10.0
Taejune Kim 10.0
Rinotuna 10.0
Maxim Verehin 7.14
Sylvain Sarrailh 6.67
Alena Aenami 6.67
Anton Fadeev 6.67
Krenz Cushart 6.67


In [17]:
#higher 10
print("Top 10")
for name, value in sorted_accuracy[:10]:
    print('|'+name+'|' + str(round(value, 2))+'%|')

Top 10
|Jessica Oyhenart|13.33%|
|hyeonsick choi (aruana sick)|13.33%|
|Andreas Rocha|10.0%|
|Taejune Kim|10.0%|
|Rinotuna|10.0%|
|Maxim Verehin|7.14%|
|Sylvain Sarrailh|6.67%|
|Alena Aenami|6.67%|
|Anton Fadeev|6.67%|
|Krenz Cushart|6.67%|


In [18]:
df = pd.DataFrame.from_dict(artist_accuracy, orient='index', columns=['accuracy'])
df.to_csv('../error_analysis/artists_real_img_artstation_filtered_error_analysis_'+pretrained_model+'_ViT-B-32.csv')