In [1]:
from glob import glob

# How many images do we have?

GLOB = '/kaggle/input/car-camera-photos/images/images/*'

print(len(list(glob(pathname=GLOB))))

22241


In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np


DEVICE = torch.device('cpu')
OUTPUT_SIZE = 512

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
extraction_layer = model._modules.get('avgpool')
model.to(DEVICE)
model.eval()

scaler = transforms.Resize((224, 224))
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.2, 0.2, 0.2])
to_tensor = transforms.ToTensor()

def get_vec(arg, model, extraction_layer):
    image = normalize(to_tensor(scaler(arg))).unsqueeze(0).to(DEVICE)
    result = torch.zeros(1, OUTPUT_SIZE, 1, 1)
    def copy_data(m, i, o):
        result.copy_(o.data)
    hooked = extraction_layer.register_forward_hook(copy_data)
    with torch.no_grad():
        model(image)
    hooked.remove()
    return result

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 122MB/s]


In [3]:
import base64
import pandas as pd
from arrow import now
from glob import glob
from io import BytesIO
from os.path import basename
from PIL import Image

DATA = '/kaggle/input/car-camera-photos/images/images'
STOP = 3000
THUMBNAIL_SIZE = (128, 128)


def embed(model, filename: str):
    with Image.open(fp=filename, mode='r') as image:
        return get_vec(arg=image.convert('RGB'), model=model, extraction_layer=extraction_layer).numpy().reshape(OUTPUT_SIZE,)


def png(filename: str) -> str:
    with Image.open(fp=filename, mode='r') as image:
        buffer = BytesIO()
        # our images are pretty big; let's shrink the hover images to thumbnail size
        image.resize(size=THUMBNAIL_SIZE).convert('RGB').save(buffer, format='png')
        return 'data:image/png;base64,' + base64.b64encode(buffer.getvalue()).decode()

def get_picture_from_glob(arg: str, stop: int) -> list:
    time_get = now()
    result = [pd.Series(data=[basename(input_file), embed(model=model, filename=input_file), png(filename=input_file), ],
                        index=['name', 'value', 'image'])
        for index, input_file in enumerate(glob(pathname=arg)) if index < stop and input_file.endswith('.jpg')]
#     print('encoded {} rows of {}  in {}'.format(len(result), now() - time_get))
    return result

time_start = now()
data_list = list(glob(DATA + '/*'))
df = pd.DataFrame(data=[get_picture_from_glob(arg=value, stop=STOP) for value in data_list])
print('done in {}'.format(now() - time_start))

done in 0:28:25.164173


In [4]:
df.shape

(22241, 1)