In [2]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import os
from efficientnet_pytorch import EfficientNet
import numpy as np
from sklearn.preprocessing import normalize
import json
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [3]:
#!wget https://raw.githubusercontent.com/lukemelas/EfficientNet-PyTorch/master/examples/simple/labels_map.txt

# Functions

In [4]:
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0)

def generate_embedding(image_path, model):
    input_tensor = preprocess_image(image_path)
    with torch.no_grad():
        output = model(input_tensor)
    return output.squeeze().numpy()

def classify_image(image_path, model):
    input_tensor = preprocess_image(image_path)
    with torch.no_grad():
        output = model(input_tensor)
    probabilities = torch.nn.functional.softmax(output, dim=1)[0]
    return probabilities

In [5]:
# Working folders
image_directory = '/root/tinier-world/notebooks/img_samples/'

In [6]:
# model setting
model = EfficientNet.from_pretrained('efficientnet-b7')
model = model.eval()

Loaded pretrained weights for efficientnet-b7


In [7]:
# Load class names
labels_map = json.load(open('labels_map.txt'))
labels_map = [labels_map[str(i)] for i in range(1000)]

In [8]:
img_list = os.listdir(image_directory)
img_list.sort()
img_list = [ i for i in img_list if '.jpg' in i]
#print(img_list)
image_path_list = [image_directory + item for item in img_list]
print(f'We have {len(img_list)} images.')

We have 100 images.


In [10]:
for i in range(len(image_path_list)):
    probabilities = classify_image(image_path_list[i], model)
    _, predicted_class_idx = torch.max(probabilities, 0)

    predicted_class_name = labels_map[predicted_class_idx]
    #print("Predicted class: {} (Probability: {:.2f}%)".format(predicted_class_name, probabilities[predicted_class_idx] * 100))

# See embeddings

In [11]:
embeddings = []
for filename in os.listdir(image_directory):
    if filename.endswith(".jpg"):
        image_path = os.path.join(image_directory, filename)
        embedding = generate_embedding(image_path, model)
        embeddings.append(embedding)

print(f'There are {len(embeddings)} embedding vectors, each size is {len(embeddings[0])}')


There are 100 embedding vectors, each size is 1000


In [13]:
# Normalized all the embedding vectors
embeddings_array = np.array(embeddings)
#normalized_embeddings = normalize(embeddings_array, axis=1)
#normalized_embeddings

array([[-0.02636609, -0.02222642, -0.0324166 , ..., -0.02546457,
         0.01534609,  0.04546103],
       [-0.01725505, -0.00919999, -0.00522672, ...,  0.21477702,
         0.03390644, -0.02383171],
       [-0.00467386, -0.02897217, -0.00233671, ...,  0.00863706,
        -0.03159492,  0.00917519],
       ...,
       [-0.02814319,  0.01097103, -0.03721805, ...,  0.00481532,
        -0.02785209, -0.0035826 ],
       [-0.02112429, -0.00581052,  0.00192864, ...,  0.00309215,
        -0.01712837, -0.01582016],
       [ 0.02694382,  0.00210261,  0.02136889, ...,  0.00150555,
        -0.01284696,  0.05090488]], dtype=float32)

In [14]:
#i = 3
#j = 4
#simility = np.dot(normalized_embeddings[i], normalized_embeddings[j])
#simility

-0.011119988

#  Nearest Neighbours

In [15]:
idx = 1
k = 3
cosine_similarities = cosine_similarity(embeddings_array)

nearest_neighbors_indices = np.argsort(cosine_similarities[idx])[-(k+1):-1][::-1]

print(f'{k} nearest neighbors indices of item[{idx}] are items: {nearest_neighbors_indices}')


3 nearest neighbors indices of item[1] are items: [45  3 60]


In [19]:
cosine_similarities.min(), cosine_similarities.max()

(-0.15432183, 1.0000004)

# Generate 

In [17]:
embedding_dict = {}

for filename in os.listdir(image_directory):
    if filename.endswith(".jpg"):
        image_path = os.path.join(image_directory, filename)
        embedding = generate_embedding(image_path, model)
        image_name = os.path.splitext(filename)[0]
        embedding_dict[image_name] = embedding
        
embedding_dict

{'d803958b-74b6-4904-afa8-0d6f4c08d695': array([-1.09133875e+00, -9.19990480e-01, -1.34177983e+00, -1.11676931e+00,
         4.49127942e-01,  6.63881183e-01, -1.61339188e+00, -2.84669220e-01,
        -3.41866612e-01, -8.29921588e-02, -2.20851764e-01, -9.75975037e-01,
        -7.36936569e-01, -5.08651435e-01, -1.24322200e+00, -4.65223998e-01,
        -1.86536282e-01, -5.79539657e-01, -2.13787749e-01, -9.86613512e-01,
         7.19167292e-02,  8.27844203e-01, -2.75720179e-01, -2.39192441e-01,
        -4.70175296e-02, -7.28628039e-02,  7.44919777e-01, -5.53914428e-01,
        -3.12673748e-02, -5.47319770e-01, -8.32762599e-01, -3.94322485e-01,
         1.39695629e-01,  4.12872910e-01, -2.28948087e-01, -1.34377384e+00,
        -8.34414423e-01, -1.53184772e+00,  1.06398952e+00, -9.67874944e-01,
        -1.26376760e+00, -6.30842268e-01, -1.02028215e+00, -7.02924728e-01,
        -1.23430753e+00,  4.18983370e-01, -2.43968081e+00, -1.39886349e-01,
        -6.77861929e-01, -6.52154326e-01, -1.705

In [21]:
embedding_dict.keys()

dict_keys(['d803958b-74b6-4904-afa8-0d6f4c08d695', '50a9d637-c18b-4175-aeb8-a02310e3fcb1', '50cfd33e-e636-45bf-a931-11c54d1041b2', '3b0f3e4d-1f3f-4ad8-a387-f05af7ec3f54', 'a1b7b26e-06a8-4414-adad-3f67902eec34', '9d359e94-c725-4d61-bb5a-2b4d0cec7519', '6e7451a4-8461-421d-92f6-85d0356d21a3', 'ab1686a0-ae01-4405-88f6-cd4ff2cc930a', 'c3cbeb3f-57fc-4a0f-840d-1ca4069dd0e3', '5a0f0024-f30f-4879-84fc-abf40bada642', '67720158-5951-4084-bb10-1f0d1a04fe82', '45c7f58d-d564-463b-87b8-87ff48e8744e', '4fbff847-9ede-4d15-9f44-60807b7853fa', '29ab7432-b1f2-4d3a-a686-c4a9a844f119', '9d59df2a-51b8-481b-a1fe-c59077666129', 'e2b49c1f-44c4-436a-b85d-45a050dc4c06', 'c88f9636-b415-469e-9f58-ee5514326f00', '8e665133-2e8e-45c7-87c5-4f316b22d2eb', 'bede65bd-1c27-45a5-b2c4-c50b53a6865c', '226916c8-c44a-4d61-b651-b37de7ddb5b1', '06b9b722-02d5-46ef-af1f-59efd1a85803', 'f933b4b3-4e91-49eb-9bdb-350ec971e50e', '65ac9ca2-7892-4b46-8fbf-9524b56055ff', '3b9011a8-737a-432b-9117-7bbb1f696865', '2e53fc4c-fcfd-4694-b122-08f1

In [23]:
# Save embedding_dict
with open('embedding_dict.pkl', 'wb') as f:
    pickle.dump(embedding_dict, f)

with open('embedding_dict.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

loaded_data.keys()

dict_keys(['d803958b-74b6-4904-afa8-0d6f4c08d695', '50a9d637-c18b-4175-aeb8-a02310e3fcb1', '50cfd33e-e636-45bf-a931-11c54d1041b2', '3b0f3e4d-1f3f-4ad8-a387-f05af7ec3f54', 'a1b7b26e-06a8-4414-adad-3f67902eec34', '9d359e94-c725-4d61-bb5a-2b4d0cec7519', '6e7451a4-8461-421d-92f6-85d0356d21a3', 'ab1686a0-ae01-4405-88f6-cd4ff2cc930a', 'c3cbeb3f-57fc-4a0f-840d-1ca4069dd0e3', '5a0f0024-f30f-4879-84fc-abf40bada642', '67720158-5951-4084-bb10-1f0d1a04fe82', '45c7f58d-d564-463b-87b8-87ff48e8744e', '4fbff847-9ede-4d15-9f44-60807b7853fa', '29ab7432-b1f2-4d3a-a686-c4a9a844f119', '9d59df2a-51b8-481b-a1fe-c59077666129', 'e2b49c1f-44c4-436a-b85d-45a050dc4c06', 'c88f9636-b415-469e-9f58-ee5514326f00', '8e665133-2e8e-45c7-87c5-4f316b22d2eb', 'bede65bd-1c27-45a5-b2c4-c50b53a6865c', '226916c8-c44a-4d61-b651-b37de7ddb5b1', '06b9b722-02d5-46ef-af1f-59efd1a85803', 'f933b4b3-4e91-49eb-9bdb-350ec971e50e', '65ac9ca2-7892-4b46-8fbf-9524b56055ff', '3b9011a8-737a-432b-9117-7bbb1f696865', '2e53fc4c-fcfd-4694-b122-08f1

In [None]:
# Save embedding nparray
with open('embedding_nparray.pkl', 'wb') as f:
    pickle.dump(embedding_dict, f)
