# Journey

In [3]:
import torch
from torch.utils.data import Dataset
import torchvision
from PIL import Image
import requests
from io import BytesIO
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import torchvision.models as models
import numpy as np
import os
from tqdm import tqdm
from sklearn.cluster import KMeans
import subprocess
import concurrent.futures
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr

First we download a subset of the images provided.

In [None]:
def download_image(name, link, out_dir):
	img_name = os.path.join(out_dir, f"{name}.jpg")
	subprocess.run(["curl", link, "--output", img_name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def download_images(df, n, out_dir="data/images"):
	if not os.path.exists(out_dir):
		os.makedirs(out_dir)
	with concurrent.futures.ThreadPoolExecutor() as executor:
		for i in range(n):
			for j, v in enumerate(("IMAGE_VERSION_1", "IMAGE_VERSION_2", "IMAGE_VERSION_3")):
				link = df.iloc[i][v]
				executor.submit(download_image, f"{i}_{j}", link, out_dir)
            
df = pd.read_csv(os.path.join("..", "data", "inditextech_hackupc_challenge_images.csv"))
download_images(df, 3000, os.path.join("..", "data", "images"))

Note that this may give error in some cases, resulting in corrupted files. This is solved by the following script, which removes those from the directory, and also those images that don'thave the right channels.

In [None]:
root_dir = "../data/images"
data = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith(".jpg")]

for path in data:
	try:
		img = Image.open(path)
		assert np.array(img).shape[2] == 3
	except:
		os.remove(path)
		print("Removed corrupted image: ", path)

We also make a quick dataloader to resize the images into a smaller, more managable size.

In [None]:
class ImageDataset(Dataset):
	def __init__(self, data):
		self.data = data

	def __len__(self):
		return len(self.data)

	def __getitem__(self, idx):
		img = Image.open(self.data[idx])
		return img

dataset = ImageDataset(data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# resize the images to 256x256
for i, img in tqdm(enumerate(dataloader)):
	img = img.to(device)
	resized_img = transforms.Resize((256, 256))(img)
	for j in range(resized_img.size(0)):
		torchvision.utils.save_image(resized_img[j], f"../data/images/{i*32+j}.jpg")

Next, we define a Lazy Dataset, which doesn't initially contain all the images, but loads them one by one as needed.

This Dataset will be used to fine-tune a ResNet neural net, which has been pretrained on ImageNet. As we have no labels, our finetuning is done in an original way: we randomly rotate the images and train the model to predict which has been the rotation. This gives the model understanding of our new dataset.

In [None]:
class LazyRotationImageDataset(Dataset):
	def __init__(self, root_dir, transform=None):
		super(LazyRotationImageDataset, self).__init__()
		self.data = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith(".jpg")]
		self.rotations = [0, 90, 180, 270]
		self.transform = transform

	def __len__(self):
		return len(self.data)

	def __getitem__(self, idx):
		img_path = self.data[idx]
		image = Image.open(img_path)

		if self.transform:
			image = self.transform(image)

		rotation_idx = torch.randint(0, 4, (1,)).item()  # Random index for rotation
		rotation_angle = self.rotations[rotation_idx]  # Corresponding rotation angle

		rotation_transform = transforms.Compose([
			transforms.RandomRotation([rotation_angle, rotation_angle], expand=True),
			transforms.ToTensor()
		])

		rotated_image = rotation_transform(image)  # Applies the selected rotation
		return rotated_image, rotation_idx

dataset = LazyRotationImageDataset("../data/images")
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
# Model modification to predict rotation
class RotationPredictor(nn.Module):
	def __init__(self):
		super().__init__()
		self.resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
		self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 4)  # Predicting 4 rotation classes

	def forward(self, x):
		return self.resnet(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RotationPredictor().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

You can optionally train the model here by your own, but we provide you with the model we've already trained:

In [None]:
# Training loop
"""
model.train()
epochs = 10
for epoch in range(epochs):
  for images, labels in tqdm(dataloader):
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")

model_finetunned = copy.deepcopy(model.resnet)
model_finetunned = nn.Sequential(*list(model_finetunned.children())[:-4], nn.AdaptiveAvgPool2d((1, 1)))
model_finetunned = torch.quantization.quantize_dynamic(
	model_finetunned, {nn.Linear, nn.Conv2d}, dtype=torch.qint8
)

torch.save(model.state_dict(), "../models/model_finetuned.pt")
"""

resnet_original = models.resnet18(weights=None)
model = nn.Sequential(*list(resnet_original.children())[:-4], nn.AdaptiveAvgPool2d((1, 1)))
model_weights_path = '../models/model_finetuned.pt'
model.load_state_dict(torch.load(model_weights_path), strict=False)
model.eval()

# Now the model is fine-tuned to predict rotations, which also improves its feature extraction capability

Now we can compute features (embeddings), for the images.

In [None]:
"""
features_finetuned = torch.empty((len(dataset), 128)).to(device)
batch_size = 32

for i, (images, labels) in tqdm(enumerate(dataloader)):
	images = images.to(device)
	labels = labels.to(device)
	outputs = model_finetunned(images).squeeze().detach()
	features_finetuned[i * batch_size: i * batch_size + len(images)] = outputs

torch.save(features_finetuned, "../data/sfeatures_finetuned.pt")
"""
features_finetuned = torch.load("../data/features_finetuned.pt")

Finally, we can use this extracted features to compare images by similarity:

In [None]:
def get_image_names(directory):
    image_extensions = '.jpg'
    image_names = []
    for filename in sorted(os.listdir(directory)):
        if any(filename.lower().endswith(ext) for ext in image_extensions):
            image_names.append(filename)
    return image_names

images_dir = "../data/images/resized"
image_names = get_image_names(images_dir)

finetunning_matrix = []
for image, vector in zip(image_names, features_finetuned):
    finetunning_matrix.append([image, vector])
finetunning_matrix = np.array(finetunning_matrix, dtype=object)

def top_k_similar_images(embeddings_with_names, specific_image_name, k=10):
    specific_image_index = None
    for i, row in enumerate(embeddings_with_names):
        if row[0] == specific_image_name:
            specific_image_index = i
            break

    if specific_image_index is None:
        raise ValueError("The specific image name was not found in the data matrix.")
    specific_embedding = embeddings_with_names[specific_image_index][1].reshape(1, -1)
    all_embeddings = np.array([row[1] for row in embeddings_with_names])
    similarities = cosine_similarity(specific_embedding, all_embeddings)
    similar_image_indices = np.argsort(-similarities)[0][:k]
    similar_image_links = embeddings_with_names[similar_image_indices, 0]

    return similar_image_links.tolist()

And here is a simple demo to try it. An image has to be selected from the directory of images the model the features were extracted from.

In [None]:
def images_to_show(img_path):
	img_name = os.path.basename(img_path).replace(".jpeg", ".jpg")
	images_name = top_k_similar_images(finetunning_matrix, img_name, k=6)
	return [os.path.join(images_dir, images_name[i]) for i in range(6)]

def set_as_input(img_path):
    img = Image.open(img_path)
    blank = np.ones_like(img)*255
    return img_path, blank, blank, blank, blank, blank, blank

with gr.Blocks() as gui:
	with gr.Column():
		with gr.Row():
			with gr.Column():
				img_in = gr.Image(type="filepath")
				btn = gr.Button("Search")
		with gr.Row():
			with gr.Column():
				img_out1 = gr.Image(show_download_button=False, interactive=False, type="filepath")
				btn1 = gr.Button("Set as input")
			with gr.Column():
				img_out2 = gr.Image(show_download_button=False, interactive=False, type="filepath")
				btn2 = gr.Button("Set as input")
			with gr.Column():
				img_out3 = gr.Image(show_download_button=False, interactive=False, type="filepath")
				btn3 = gr.Button("Set as input")
		with gr.Row():
			with gr.Column():
				img_out4 = gr.Image(show_download_button=False, interactive=False, type="filepath")
				btn4 = gr.Button("Set as input")
			with gr.Column():
				img_out5 = gr.Image(show_download_button=False, interactive=False, type="filepath")
				btn5 = gr.Button("Set as input")
			with gr.Column():
				img_out6 = gr.Image(show_download_button=False, interactive=False, type="filepath")
				btn6 = gr.Button("Set as input")
  
	btn.click(images_to_show, inputs=img_in, outputs=[img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])
	btn1.click(set_as_input, inputs=img_out1, outputs=[img_in, img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])
	btn2.click(set_as_input, inputs=img_out2, outputs=[img_in, img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])
	btn3.click(set_as_input, inputs=img_out3, outputs=[img_in, img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])
	btn4.click(set_as_input, inputs=img_out4, outputs=[img_in, img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])
	btn5.click(set_as_input, inputs=img_out5, outputs=[img_in, img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])
	btn6.click(set_as_input, inputs=img_out6, outputs=[img_in, img_out1, img_out2, img_out3, img_out4, img_out5, img_out6])

gui.launch()

---
Possible Improvements on efficiency: Clustering

If our dataset of clothes becomes to big, a lot of computational power might be used inecesssarily as data points that are very dissimilar are treated and compared equally like similar points.  
We tought of grouping the data into clusters, and only check for similarites on members of that cluster for a given point.  
We currently didn't include it in the final result (altought we implemented it), as the performance slightly dropped and, with a small dataset, the increase in efficiency was small. But we believe that for much bigger dataset (millions of images), this approach will be a necesity.

This approach is developped in more depth in the `clustering.ipynb` notebook.