In [1]:
!pip install --quiet img2vec_pytorch
print('pip install img2vec complete')

pip install img2vec complete


In [2]:
import base64
import pandas as pd
from arrow import now
from glob import glob
from img2vec_pytorch import Img2Vec
from io import BytesIO
from os.path import basename
from PIL import Image
from plotly import express

# we're going to use the updated dataset
GLOB = '/kaggle/input/ai-artwork/data/'
SIZE = 512
STOP = 100
THUMBNAIL_SIZE = (128, 128)


def embed(model, filename: str):
    with Image.open(fp=filename, mode='r') as image:
        return model.get_vec(image, tensor=True).numpy().reshape(SIZE,)


# https://stackoverflow.com/a/952952
def flatten(arg):
    return [x for xs in arg for x in xs]

def png(filename: str) -> str:
    with Image.open(fp=filename, mode='r') as image:
        buffer = BytesIO()
        # our images are pretty big; let's shrink the hover images to thumbnail size
        image.resize(size=THUMBNAIL_SIZE).save(buffer, format='png')
        return 'data:image/png;base64,' + base64.b64encode(buffer.getvalue()).decode()

def get_picture_from_glob(arg: str, tag: str, stop: int) -> list:
    time_get = now()
    result = [pd.Series(data=[tag, basename(input_file), embed(model=model, filename=input_file), png(filename=input_file), ],
                        index=['tag', 'name', 'value', 'image'])
        for index, input_file in enumerate(glob(pathname=arg)) if index < stop]
    print('encoded {} data {} rows in {}'.format(tag, len(result), now() - time_get))
    return result

time_start = now()
model = Img2Vec(cuda=False, model='resnet-18')
data_dict = {basename(folder) : folder + '/*.*' for folder in glob(GLOB + '/*')}
df = pd.DataFrame(data=flatten(arg=[get_picture_from_glob(arg=value, tag=key, stop=STOP) for key, value in data_dict.items()]))
print('done in {}'.format(now() - time_start))


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 232MB/s]


encoded Human_Color_Field_Painting data 100 rows in 0:00:10.605307
encoded Human_Northern_Renaissance data 100 rows in 0:00:12.189821
encoded AI_SD_impressionism data 100 rows in 0:00:07.585511
encoded AI_LD_post_impressionism data 100 rows in 0:00:06.277954
encoded AI_DiffusionDB_small_2 data 100 rows in 0:00:06.654549
encoded AI_LD_art_nouveau data 100 rows in 0:00:06.356632
encoded Human_Art_Nouveau_Modern data 100 rows in 0:00:12.017091
encoded Human_Synthetic_Cubism data 100 rows in 0:00:12.224073
encoded AI_LD_surrealism data 100 rows in 0:00:06.201887
encoded AI_LD_ukiyo-e data 100 rows in 0:00:06.126617
encoded AI_LD_romanticism data 100 rows in 0:00:06.620633
encoded Human_High_Renaissance data 100 rows in 0:00:11.776457
encoded Human_Post_Impressionism data 100 rows in 0:00:11.568374
encoded AI_LD_baroque data 100 rows in 0:00:06.602787
encoded AI_LD_expressionism data 100 rows in 0:00:06.352551
encoded AI_SD_post_impressionism data 100 rows in 0:00:07.202981
encoded Human_Sy