In [1]:
!pip install --quiet img2vec_pytorch
print('pip installed img2vec')

pip installed img2vec


In [2]:
from warnings import filterwarnings
filterwarnings(action='ignore', category=FutureWarning) # quiet a plotly issue
filterwarnings(action='ignore', category=UserWarning) # quiet an img2vec issue

In [3]:
from img2vec_pytorch import Img2Vec
from PIL import Image
from arrow import now
from glob import glob
import pandas as pd
from os.path import basename

img2vec = Img2Vec(cuda=False, model='resnet-18', layer='default', layer_output_size=512)

# https://stackoverflow.com/a/952952
def flatten(arg):
    return [x for xs in arg for x in xs]

def get_from_glob(arg: str, tag: str) -> list:
    time_get = now()
    result = []
    for input_file in glob(pathname=arg):
        name = basename(input_file)
        try:
            with Image.open(fp=input_file, mode='r') as image:
                vector = img2vec.get_vec(image, tensor=True).numpy().reshape(512,)
                result.append(pd.Series(data=[tag, name, vector], index=['tag', 'name', 'value']))
        except RuntimeError:
            # we only have a few failures so we're just going to discard them
            print('runtime failure: {}'.format(tag, name))
            pass
    print('encoded {} data in {}'.format(tag, now() - time_get))
    return result

time_start = now()
train = {' '.join(basename(folder).split('_')[1:]) : folder + '/*.jpg' 
         for folder in glob('/kaggle/input/aruzz22-5k-an-image-dataset-of-rice-varieties/1_TRAIN/*')}
train_data = [get_from_glob(arg=value, tag=key) for key, value in train.items()]
df = pd.DataFrame(data=flatten(arg=train_data))
    
print('done in {}'.format(now() - time_start))

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 257MB/s]


encoded Bashmoti data in 0:01:01.310169
encoded Lal Binni data in 0:01:00.320399
encoded BR29 data in 0:00:59.478596
encoded Katarivog data in 0:00:58.290380
encoded Jirashail data in 0:00:58.906762
encoded Lal Aush data in 0:00:58.022467
encoded Shampakatari data in 0:00:59.211559
encoded Amon data in 0:00:57.300121
encoded Shorna5 data in 0:00:58.114162
encoded Subol Lota data in 0:00:59.090097
encoded Katari Polao data in 0:00:58.011465
encoded Najirshail data in 0:00:57.491018
encoded BR28 data in 0:00:57.778824
encoded Lal Biroi data in 0:00:58.165159
encoded Red Cargo data in 0:00:57.427940
encoded Paijam data in 0:00:56.258129
encoded Chinigura Polao data in 0:00:57.846144
encoded Gutisharna data in 0:00:56.849251
encoded Bashful data in 0:00:57.446285
encoded Ganjiya data in 0:00:58.108266
done in 0:19:26.836691
