#### Prepare data for 5 images

In [None]:
from PIL import Image, ImageOps

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import os
import cv2
import numpy as np

In [None]:
def get_edited_folders():
  return ['bordered/#000', 'bordered/#fff', 'cropped/center', 'cropped/left_half', 'cropped/lower_half', 'cropped/lowerleft_quarter', 'cropped/lowerright_quarter', 'cropped/right_half', 'cropped/upper_half', 'cropped/upperleft_quarter', 'cropped/upperright_quarter', 'filtered/aden', 'filtered/inkwell', 'filtered/lofi', 'mirrored', 'rotated/45', 'rotated/90', 'rotated/180']

In [None]:
# Process Data

image_list = ["BwjvPEAgrRr.jpg", "BxNDSVPAXRr.jpg", "BxPSFbeHxrR.jpg", "BtiVFeUgrrR.jpg"]

labels = []

l1 = "Dog"
l2 = "Kid"
l3 = "Party"
l4 = "Skeeing"

for _ in range(19):
    labels.append(l1)
    labels.append(l2)
    labels.append(l3)
    labels.append(l4)

# Images

image_paths = []

original_basepath = '/content/drive/My Drive/DL project/data_plain/r/r'

# Original Images
for img in image_list:
  path1 = original_basepath+"/"+img
  image_paths.append(path1)

edited_folders = get_edited_folders()
edited_basepath = original_basepath.replace('data_plain', 'data_prepared')

# Edited images
for folder in edited_folders:
  edited_folders = get_edited_folders()
  for img in image_list:
    path2 = f'{edited_basepath}/{folder}/{img}'
    image_paths.append(path2)

In [None]:
print(labels)

print(image_paths)

['Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing', 'Dog', 'Kid', 'Party', 'Skeeing']
['/content/drive/My Drive/DL project/data_plain/r/r/BwjvPEAgrRr.jpg', '/content/drive/My Drive/DL project/data_plain/r/r/BxNDSVPAXRr.jpg', '/content/drive/My Drive/DL project/data_plain/r/r/BxPSFbeHxrR.jpg', '/content/drive/My Drive/DL project/data_plain/r/r/BtiVFeUgrrR.jpg', '/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#000/BwjvPEAg

### Extracting embeddings from ISC21-Descriptor-Track-1st

In [None]:
!pip install git+https://github.com/lyakaap/ISC21-Descriptor-Track-1st --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import torch
import numpy as np
import pandas as pd
from isc_feature_extractor import create_model
from pathlib import Path
from PIL import Image

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
recommended_weight_name = 'isc_ft_v107'
model, preprocessor = create_model(weight_name=recommended_weight_name, device=device)

In [None]:
def get_embedding(path):
  print(path)
  image = Image.open(path)
  x = preprocessor(image).unsqueeze(0).to(device)
  with torch.no_grad():
    y = model(x)
  return y

In [None]:
torch.cuda.empty_cache()

In [None]:
model_embeddings = []
for path in image_paths:
  model_embeddings.append(get_embedding(path))

/content/drive/My Drive/DL project/data_plain/r/r/BwjvPEAgrRr.jpg
/content/drive/My Drive/DL project/data_plain/r/r/BxNDSVPAXRr.jpg
/content/drive/My Drive/DL project/data_plain/r/r/BxPSFbeHxrR.jpg
/content/drive/My Drive/DL project/data_plain/r/r/BtiVFeUgrrR.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#000/BwjvPEAgrRr.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#000/BxNDSVPAXRr.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#000/BxPSFbeHxrR.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#000/BtiVFeUgrrR.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#fff/BwjvPEAgrRr.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#fff/BxNDSVPAXRr.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#fff/BxPSFbeHxrR.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/bordered/#fff/BtiVFeUgrrR.jpg
/content/drive/My Drive/DL project/data_prepared/r/r/cropped/center/Bwjv

In [None]:
len(model_embeddings)

76

### Using T-SNE to visualize

In [None]:
# Concatenate tensors into a single array
concatenated_array = np.concatenate([tensor.numpy().flatten() for tensor in model_embeddings], axis=0)

reshaped_array = concatenated_array.reshape(len(model_embeddings), -1)

In [None]:
from sklearn.manifold import TSNE
import time

start = time.time()
tsne = TSNE(n_components = 3, random_state=0)
projections = tsne.fit_transform(reshaped_array)
end = time.time()
print(f"generating projections with T-SNE took: {(end-start):.2f} sec")

generating projections with T-SNE took: 1.33 sec


In [None]:
len(projections)

76

In [None]:
import plotly.express as px
fig = px.scatter_3d(
    projections, x=0, y=1, z=2,
    color=labels, hover_data=[image_paths]
)
fig.update_traces(marker_size=8)
fig.show()

### Using UMAP to visualize

In [None]:
! pip install --upgrade umap-learn --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.9/90.9 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.8/55.8 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for umap-learn (setup.py) ... [?25l[?25hdone


In [None]:
import umap.umap_ as umap
import time

In [None]:
start = time.time()
projections_umap = umap.UMAP(n_components=3).fit_transform(reshaped_array)
end = time.time()
print(f"generating projections with UMAP took: {(end-start):.2f} sec")

generating projections with UMAP took: 5.51 sec


In [None]:
fig = px.scatter_3d(
    projections_umap, x=0, y=1, z=2,
    color=labels, hover_data=[image_paths]
)
fig.update_traces(marker_size=8)
fig.show()

### Using cosine similarity to detect duplicates

In [None]:
import numpy as np

def normalize(vec: np.ndarray):
    return vec / np.linalg.norm(vec, axis=1, keepdims=True)

In [None]:
normalized_embeddings = normalize(reshaped_array)
cosine_sim_matrix = np.dot(normalized_embeddings, normalized_embeddings.T)

In [None]:
import numpy as np
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(z=cosine_sim_matrix, colorscale='Viridis'))

fig.update_layout(
    title="Cosine Similarity Matrix",
    xaxis_title="Image Index",
    yaxis_title="Image Index",
    font=dict(size=14),
    width=1000,
    height=1000,
    margin=dict(t=100, r=100, b=100, l=100),
)

fig.show()