# Get image embeddings from EfficientNet via img2vec

In [None]:
!pip install img2vec_pytorch --user

In [2]:
# imports
from img2vec_pytorch import Img2Vec
import os
import pandas as pd
from PIL import Image

In [None]:
# Images to process
image_dir = './val2017/'

# Get img2vec
img2vec = Img2Vec(model='efficientnet_b3')

df = pd.DataFrame()
num_files = len(os.listdir(image_dir))
for i, filename in enumerate(os.listdir(image_dir)):
    # Print progress
    print(f"{i} out of {num_files} - {filename}")

    # Load image
    img = Image.open(image_dir+filename)

    # Some images are BW, convert to RGB
    if img.mode != 'RGB':
        print(f"Converting {filename} to RGB")
        img = img.convert('RGB')
    
    # Get embedding and store in dataframe
    embedding = img2vec.get_vec(img, tensor=False)
    row = pd.DataFrame({"filename": filename, "embedding": [embedding]})
    df = pd.concat([df, row], axis=0)

In [11]:
df.sample(10)

Unnamed: 0,filename,embedding
0,000000183965.jpg,"[-0.010022697, -0.0027541262, -0.07790962, -4...."
0,000000139077.jpg,"[8.464758, -1.0534918e-08, -0.06922387, -0.000..."
0,000000351096.jpg,"[0.4889195, -6.8477846e-09, -1.2544345e-05, 19..."
0,000000105264.jpg,"[-0.0016095863, -0.00022712024, -0.22042257, -..."
0,000000389933.jpg,"[-2.8899992e-06, -0.0030170023, -3.51088e-08, ..."
0,000000235064.jpg,"[0.16119336, -0.023483042, 5.347272, -0.002037..."
0,000000378873.jpg,"[-4.711545e-06, -0.061488062, -0.09265913, 8.4..."
0,000000558073.jpg,"[-0.27766138, -1.2798888e-10, -0.00011446543, ..."
0,000000400367.jpg,"[-8.816778e-07, 12.952164, -0.2537899, -1.5509..."
0,000000282298.jpg,"[-3.5908335e-05, -0.16581029, 2.0920746, -3.12..."


In [58]:
# Save results
df.to_parquet("efficientnet_b3_image_embeddings.parquet")