# Imports

In [1]:
import os

import numpy as np
from PIL import Image
import torch
import torchvision.transforms as T

from transformers import AutoProcessor, CLIPModel

# Load models

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [24]:
model_name_1 = "openai/clip-vit-large-patch14"
model_name_2 = "Salesforce/blip-image-captioning-large" #

model_1 = CLIPModel.from_pretrained(model_name_1).to(device)
processor_1 = AutoProcessor.from_pretrained(model_name_1)

# model_2 = BlipModel.from_pretrained(model_name_2).to(device)
# processor_2 = AutoProcessor.from_pretrained(model_name_2)

# Transformations

In [21]:
# Data transformation chain.
transformation_chain = T.Compose(
    [
        # We first resize the input image to 256x256 and then we take center crop.
        T.Resize((256, 256)),
        T.CenterCrop((192, 192)),
    ]
)

# Create Dataset

In [3]:
from image_similarity.data_preprocesing import create_dataset, get_idx

In [5]:
# train, test = create_dataset("../data/my_dataset", max_num=5000)

In [6]:
# train, test = create_dataset("../data/train", max_num=10000, train_test_percentage=0.2)

In [4]:
train, test = create_dataset("train_embeddings.csv")

In [86]:
import pandas as pd
df = pd.read_csv("train_embeddings.csv")
# df['img_path'] = df['img_path'].apply(lambda x: x.replace("..", "E:\MachineLearningProjects\ml-practices\src\haccaton"))
df.head()
df.drop(["Unnamed: 0.1", "Unnamed: 0"], axis=1, inplace=True)

In [87]:
df.to_csv("train_embeddings.csv")

In [88]:
len(df["object_id"])

20345

In [89]:
len(df["object_id"].unique())

16430

In [5]:
train

Dataset({
    features: ['image_file_path', 'image', 'labels'],
    num_rows: 16276
})

In [91]:
b = pd.Series([1])

1

In [11]:
num_samples = len(train)
seed = 42
batch_size = 64
candidate_subset = train.shuffle(seed=seed)

In [12]:
def extract_embeddings(batch):
    """Utility to compute embeddings."""
    return {"embeddings": batch["image"]}


In [14]:
candidate_subset_emb = candidate_subset.map(extract_embeddings, batched=True, batch_size=batch_size)

Map:   0%|          | 0/16276 [00:00<?, ? examples/s]

In [15]:
all_candidate_embeddings = torch.from_numpy(np.array(candidate_subset_emb["embeddings"]))

In [16]:

candidate_ids = get_idx(candidate_subset_emb)

  0%|          | 0/16276 [00:00<?, ?it/s]

# Run

In [26]:
from ultralytics import YOLO

# Load a pretrained YOLOV8 model

yolo_model = YOLO(r"E:\MachineLearningProjects\ml-practices\src\haccaton\classification\runs\classify\train11\weights\best.pt")

In [18]:
test_img = test['image']

In [76]:
from typing import Union
import cv2

def predict_top_k(img: Union[int, str], k=10):
    sample = Image.open(img) # test_img[img] if isinstance(img, int) else 
    pred = yolo_model.predict(img)
    group = [d.index(max(d)) for d in [c.cpu().numpy().tolist() for c in [r.probs.data for r in pred]]][0]
    group_name = pred[0].names[group]

    sample = [transformation_chain(sample)]
    with torch.no_grad():
        embeddings_model = get_embeddings(processor=processor_1,
                                      model=model_1,
                                      image=sample)
        embeddings = torch.from_numpy(np.insert(np.array(embeddings_model.data.cpu()).squeeze(0), 0, group))
        # embeddings_2 = get_embeddings(processor=processor_2,
        #                               model=model_2,
        #                               image=sample)

    # emb = torch.hstack((embeddings_1, embeddings_2))
    sim_idx, sim_labels, sim_paths, sim_scores = fetch_similar(query_embeddings=embeddings, all_embeddings=all_candidate_embeddings, idx=candidate_ids, top_k=k)
    print(*zip(sim_labels, sim_scores), sep="\n")
    orig_path = cv2.imread(test['image_file_path'][img] if isinstance(img, int) else img)

    images = [cv2.imread(x) for x in sim_paths]

    for i in range(len(images)):
        images[i] = cv2.resize(images[i], (255, 255))

    # concatenate image Vertically 
    vert = np.concatenate(images, axis=0)

    cv2.imwrite("OrigImg.jpg", orig_path)
    cv2.imwrite('TopK.jpg', vert)
        

In [79]:
predict_top_k(os.path.join("E:\MachineLearningProjects\ml-practices\src\haccaton\data", "plastinka.jpg"))

image 1/1 E:\MachineLearningProjects\ml-practices\src\haccaton\data\plastinka.jpg: 256x256  1.00,  0.00,  0.00,  0.00,  0.00, 78.7ms
Speed: 9.0ms preprocess, 78.7ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 256)
('22796825', 0.7540040016174316)
('25585108', 0.7539374828338623)
('10320706', 0.7452168464660645)
('11285091', 0.7439494729042053)
('8720483', 0.7431620955467224)
('22924337', 0.7420465350151062)
('9776990', 0.7379002571105957)
('44778134', 0.736646831035614)
('17101370', 0.7337498068809509)
('43800883', 0.7335513830184937)


In [80]:
predict_top_k(os.path.join("E:\MachineLearningProjects\ml-practices\src\haccaton\data", "4701371.jpg"))

image 1/1 E:\MachineLearningProjects\ml-practices\src\haccaton\data\4701371.jpg: 256x256  1.00,  0.00,  0.00,  0.00,  0.00, 85.7ms
Speed: 4.0ms preprocess, 85.7ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 256)
('4701371', 0.9402170181274414)
('6339689', 0.8821889758110046)
('5029921', 0.8777577877044678)
('4708600', 0.8774980306625366)
('4702385', 0.8773635029792786)
('4707704', 0.8772224187850952)
('9872355', 0.875396728515625)
('10713733', 0.8740614652633667)
('9871462', 0.8736203908920288)
('4704535', 0.8733720183372498)


### Ideas how to upgrade

predict_top_k by k-neighbors 

Structural Similarity Index (SSIM) - ! with gray scale
from skimage.metrics import structural_similarity as ssim

Calulate mean square error

def MeanSquareError(image1, image2):
    imageError= np.sum((image1.astype("float32")-image2.astype("float32"))**2)
    imageError = imageError/float(image1.shape[0]*image2.shape[1])
    return imageError

def ImageComparision (image1, image2):
    mean = MeanSquareError(image1, image2)
    Ssim= ssim(image1, image2)
    print(f"Mean Square Error is {mean}\n Structural Similarity Index{Ssim}")