In [None]:
# @title
from PIL import Image, ImageOps

In [None]:
# @title
!pip install transformers

In [None]:
# @title
import torch
from transformers import CLIPProcessor, CLIPModel

device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

In [None]:
# @title
! pip install fastdtw

from scipy.spatial.distance import cosine
from fastdtw import fastdtw

In [None]:
# @title
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


### Visualisation for Simple Image embeddings (image.flatten)

In [None]:
# @title
import os
import cv2
import numpy as np

In [None]:
# @title
def get_edited_folders():
  return ['bordered/#000', 'bordered/#fff', 'cropped/center', 'cropped/left_half', 'cropped/lower_half', 'cropped/lowerleft_quarter', 'cropped/lowerright_quarter', 'cropped/right_half', 'cropped/upper_half', 'cropped/upperleft_quarter', 'cropped/upperright_quarter', 'filtered/aden', 'filtered/inkwell', 'filtered/lofi', 'mirrored', 'rotated/45', 'rotated/90', 'rotated/180']

In [None]:
# @title
# Process Data

image_list = ["BwjvPEAgrRr.jpg", "BxNDSVPAXRr.jpg", "BxPSFbeHxrR.jpg", "BtiVFeUgrrR.jpg"]

labels = []

l1 = "Dog"
l2 = "Kid"
l3 = "Party"
l4 = "Skeeing"

for _ in range(19):
    labels.append(l1)
    labels.append(l2)
    labels.append(l3)
    labels.append(l4)

# Images

images = []
images_flattened = []
image_paths = []

original_basepath = '/content/drive/My Drive/DL project/data_plain/r/r'

# Original Images
for img in image_list:
  path1 = original_basepath+"/"+img
  image = cv2.imread(str(path1))
  images.append(image)
  images_flattened.append(image.flatten())
  image_paths.append(path1)

edited_folders = get_edited_folders()
edited_basepath = original_basepath.replace('data_plain', 'data_prepared')

# Edited images
for folder in edited_folders:
  edited_folders = get_edited_folders()
  for img in image_list:
    path2 = f'{edited_basepath}/{folder}/{img}'
    image_paths.append(path2)
    image = cv2.imread(str(path2))
    images.append(image)
    images_flattened.append(image.flatten())

In [None]:
# @title
print(labels)

print(image_paths)

In [None]:
# @title
import base64

def image_to_data_uri(image_path):
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    return "data:image/jpeg;base64," + encoded_image

In [None]:
# @title
image_data_uris = {path: image_to_data_uri(path) for path in image_paths}

In [None]:
# @title
# class associated with image
labels_arr = np.array(labels)
# features extracted from image
image_arr = np.array(images_flattened)

In [None]:
# @title
image_arr.shape

In [None]:
# @title
max_length = max(len(image) for image in image_arr)

# Pad or truncate each image to have the same length
padded_images = [np.pad(image, (0, max_length - len(image))) if len(image) < max_length else image[:max_length] for image in image_arr]

padded_images = np.array(padded_images)

*Noticed that the shapes of the images varies.*

#### Generate T-SNE projections

In [None]:
# @title
from sklearn.manifold import TSNE
import time

start = time.time()
tsne = TSNE(n_components = 3, random_state=0)
projections = tsne.fit_transform(padded_images)
end = time.time()
print(f"generating projections with T-SNE took: {(end-start):.2f} sec")

In [None]:
# @title
len(projections)

In [None]:
# @title
import plotly.express as px
fig = px.scatter_3d(
    projections, x=0, y=1, z=2,
    color=labels, hover_data=[image_paths]
)
fig.update_traces(marker_size=8)
fig.show()

*Cannot see any distinct clusters formed. The data points are scattered.*

#### Generate UMAP projection

In [None]:
# @title
! pip install --upgrade umap-learn

In [None]:
# @title
import umap.umap_ as umap
import time

start = time.time()
projections_umap = umap.UMAP(n_components=3).fit_transform(padded_images)
end = time.time()
print(f"generating projections with UMAP took: {(end-start):.2f} sec")

generating projections with UMAP took: 41.64 sec


In [None]:
# @title
fig = px.scatter_3d(
    projections_umap, x=0, y=1, z=2,
    color=labels, hover_data=[image_paths]
)
fig.update_traces(marker_size=8)
fig.show()

*Some clusters are formed for images - the cropped images for the same image are very close to each other and the copies images with other operations are very close to each other. Different images with similar operations are also pretty close to each other.*

### CLIP embeddings and Visualization

In [None]:
@title
def compute_similarity_for_images(path1, path2):
  # embedding extraciton process
  image1 = Image.open(path1)
  inputs1 = processor(images=image1, return_tensors="pt").to(device)
  image2 = Image.open(path2)
  inputs2 = processor(images=image2, return_tensors="pt").to(device)

  with torch.no_grad():
      embedding1 = model.get_image_features(**inputs1)
      embedding2 = model.get_image_features(**inputs2)
      #embedding1 = embedding1.flatten()

  # Move the tensor to the CPU
  embedding1 = embedding1.cpu()
  embedding2 = embedding2.cpu()

  # Convert the CPU tensor to a NumPy array
  embedding1 = embedding1.numpy()
  embedding2 = embedding2.numpy()

  distance, path =  fastdtw(embedding1, embedding2, dist=cosine)
  similarity_score = 1 / (1 + distance)  # Higher score indicates higher similarity
  return similarity_score
  # print(f"\nSimilarity Score for image {path1} and {path2} is : {similarity_score}.")

In [None]:
@title
original_basepath = '/content/drive/My Drive/DL project/data_plain/r/r'
import torch
import os
import numpy as np
from sys import stdout

def get_edited_folders():
  return ['bordered/#000', 'bordered/#fff', 'cropped/center', 'cropped/left_half', 'cropped/lower_half', 'cropped/lowerleft_quarter', 'cropped/lowerright_quarter', 'cropped/right_half', 'cropped/upper_half', 'cropped/upperleft_quarter', 'cropped/upperright_quarter', 'filtered/aden', 'filtered/inkwell', 'filtered/lofi', 'mirrored', 'rotated/45', 'rotated/90', 'rotated/180']

def get_edited_filepaths(original_filepath):
  filepaths = []
  original_basepath, original_filename = os.path.split(os.path.abspath(original_filepath))
  edited_basepath = original_basepath.replace('data_plain', 'data_prepared')
  edited_folders = get_edited_folders()
  for folder in edited_folders:
    filepaths.append(f'{edited_basepath}/{folder}/{original_filename}')
  return filepaths

In [None]:
# @title
def compute_embeddings_for_images(path):
  # embedding extraciton process
  image1 = Image.open(path)
  inputs = processor(images=image1, return_tensors="pt").to(device)

  with torch.no_grad():
      embedding = model.get_image_features(**inputs)

  # Move the tensor to the CPU
  embedding = embedding.cpu()

  # Convert the CPU tensor to a NumPy array
  embedding = embedding.numpy()
  return embedding

In [None]:
# @title
embeddings = []
for path in image_paths:
  embeddings.append(compute_embeddings_for_images(path))

#### Visualisation using T-SNE

In [None]:
# @title
# features extracted from image
embeddings_arr = np.array(embeddings)

In [None]:
# @title
from sklearn.decomposition import PCA
clip_embeddings_reshaped = np.reshape(embeddings_arr, (76, 512))
# Apply PCA to reduce dimensionality
pca = PCA(n_components=50)  # Choose the number of principal components
clip_embeddings_pca = pca.fit_transform(clip_embeddings_reshaped)


In [None]:
# @title
start = time.time()
tsne = TSNE(n_components = 3, random_state=0)
projections = tsne.fit_transform(clip_embeddings_pca)
end = time.time()
print(f"generating projections with T-SNE took: {(end-start):.2f} sec")

generating projections with T-SNE took: 1.83 sec


In [None]:
# @title
import plotly.express as px
fig = px.scatter_3d(
    projections, x=0, y=1, z=2,
    color=labels, hover_data=[image_paths]
)
fig.update_traces(marker_size=8)
fig.show()

*We can see that thge clusters are very distinguishable as compared to before. There are however overlap between different images and some modified images are a little further than there source image.*

#### Visualization Using UMAP

In [None]:
# @title
start = time.time()
projections_umap_clip = umap.UMAP(n_components=3).fit_transform(clip_embeddings_pca)
end = time.time()
print(f"generating projections with UMAP took: {(end-start):.2f} sec")

In [None]:
# @title
fig = px.scatter_3d(
    projections_umap_clip, x=0, y=1, z=2,
    color=labels, hover_data=[image_paths]
)
fig.update_traces(marker_size=8)
fig.show()

*Damnn!! This is crazy. The clusters are so well formed and are easily differentiable.*

### Using cosine similarity to detect duplicates

In [None]:
import numpy as np

def normalize(vec: np.ndarray):
    return vec / np.linalg.norm(vec, axis=1, keepdims=True)

In [None]:
clip_embeddings_reshaped = np.reshape(embeddings, (76, 512))
normalized_embeddings = normalize(clip_embeddings_reshaped)
cosine_sim_matrix = np.dot(normalized_embeddings, normalized_embeddings.T)

In [None]:
import numpy as np
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(z=cosine_sim_matrix, colorscale='Viridis'))

fig.update_layout(
    title="Cosine Similarity Matrix",
    xaxis_title="Image Index",
    yaxis_title="Image Index",
    font=dict(size=14),
    width=1000,
    height=1000,
    margin=dict(t=100, r=100, b=100, l=100),
)

fig.show()

We can see that there are these diagonals with major yellow color, these are all at a distance of 4 (the number of images we have taken). Using this cosine similarity matrix we can also query for most similar images.