In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, TensorDataset, ConcatDataset
from torchvision import datasets, models


In [2]:
chemin_dossier_images = "./data"
chemin_dossier_images_affiche = "./data/DAM"
chemin_test = "./data/test_image_headmind"

In [3]:
# On crée une class custom pour labelliser nos images
class CustomDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.image_paths = [os.path.join(folder_path, img_name) for img_name in os.listdir(folder_path)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")

        # On utilise le titre de l'image comme label
        label = os.path.splitext(os.path.basename(img_path))[0]

        if self.transform:
            img = self.transform(img)

        return img, label   


In [4]:
# Image loading and feature extraction
def extract_features(image_folder_path, model, transform=None):
    dataset = CustomDataset(image_folder_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

    features_list = []

    model.eval()
    with torch.no_grad():
        for images, label in dataloader:
            features = model(images)
            features = features.cpu().numpy().flatten()
            features_list.append((features,label))

    return features_list

In [5]:
# Create a new dataset of features extracted from training images
def create_train_dataset_features(image_folder_path, model, nb_transformation_per_image, transform=None):
    features_list = []
    for _ in range(nb_transformation_per_image):
        features_list.extend(extract_features(image_folder_path, model, transform))

    all_features = np.array([features for features, _ in features_list])
    all_labels = np.array([label for _, label in features_list])

    # all_features_tensor = torch.tensor(all_features)
    # all_labels_tensor = torch.tensor(all_labels)

    # return DataLoader(TensorDataset(all_features_tensor, all_labels), batch_size=1, shuffle=True)
    return all_features, all_labels

In [6]:
# Load pre-trained ResNet18 model
model = models.resnet18(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))

# Image folder path
image_folder_path = 'data/DAM_extraction/'

# Transformation applied to each image
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(45),
    #transforms.RandomErasing(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Number of transformation used by image
nb_transformation_per_image = 8

In [7]:
extracted_features_data_set = create_train_dataset_features(image_folder_path, model, nb_transformation_per_image ,transform=train_transform)
# torch.save(train_dataloader,'train_dataloader.pth')

In [25]:
# Function to calculate cosine similarity between two feature vectors
def calculate_cosine_similarity(feature1, feature2):
    # Ensure the features are 1D tensors
    feature1 = feature1.flatten()
    feature2 = feature2.flatten()

    # # Convert tensors to numpy arrays
    # feature1_np = feature1.cpu().numpy()
    # feature2_np = feature2.cpu().numpy()

    # Calculate cosine similarity
    similarity = cosine_similarity([feature1], [feature2])[0, 0]

    return similarity

In [29]:
def get_top_similar_images(input_image_path, features_dataloader, model, preprocess, top_k=10):
    # Load the input image
    input_image = Image.open(input_image_path).convert("RGB")

    # Preprocess the input image
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)  # Add batch dimension

    # Extract features for the input image
    with torch.no_grad():
        input_features = np.array(model(input_batch).cpu().numpy().flatten())

    # Calculate cosine similarity with the features in the features_dataloader
    similarities = []
    all_features, all_labels = features_dataloader
    for features, _ in zip(all_features, all_labels):
        similarity = calculate_cosine_similarity(input_features, features)
        similarities.append(similarity)

    # Get indices of top-k similar images
    top_k_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]

    # Return the top-k similar images
    top_k_similar_images = [(features[i], label) for i, (features, label) in enumerate(zip(all_features, all_labels)) if i in top_k_indices]

    return top_k_similar_images

In [30]:
def imshow(img,label):
    img = img / 2 + 0.5 
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.title(label)
    plt.show()

In [32]:
input_image_path = "data/DAM/02JHE090I610C905.jpeg"

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

top_10 = get_top_similar_images(input_image_path, extracted_features_data_set, model, preprocess, top_k=10)

print("The label to find is : 02JHE090I610C905")
# imshow(Image.open(input_image_path).convert("RGB"),"02JHE090I610C905")

print("The top 10 similar images are :")
for _ , label in top_10:
    # image_path = "data/DAM/"+label+".jpeg"
    # image = Image.open(image_path).convert("RGB")
    # imshow(image,label)
    print(label)


The label to find is : 02JHE090I610C905
The top 10 similar images are :
['02MDA070I600C417']
['02JHE090I610C905']
['02JHE090I610C905']
['02MDA070I600C417']
['01DJW924I132C976']
['02MDA070I600C417']
['02JHE090I610C905']
['02MDA070I600C417']
['01DJW924I132C976']
['02JHE090I610C905']


In [None]:
"""
model = models.resnet18(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))
model.eval()

# image_path = 'data/DAM/010M03A1116X9000.jpeg'
# input_image = Image.open(image_path)

image_paths = [
	'data/DAM/012B03A3985X5902.jpeg',
	'data/DAM/012A09A3232X5597.jpeg',
	'data/DAM/14DDN978A133C568.jpeg'
]

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Extract features for each image
features_list = []
for image_path in image_paths:
    input_image = Image.open(image_path)
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        features = model(input_batch)

    # Convert features to a numpy array and flatten it
    features_list.append(features.cpu().numpy().flatten())

# Compute cosine similarity between features
cosine_similarities = cosine_similarity(features_list)

# Print the cosine similarity matrix
print("Cosine Similarity Matrix:")
print(cosine_similarities)
"""

In [None]:
"""# Path to the DAM image for comparison
query_image_path = 'data/DAM/012B03A3985X5902.jpeg'

# Path to the data folder
data_folder = 'data'

# Number of top similar images to retrieve
top_k = 10

# Load and preprocess the query image
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

query_image = Image.open(query_image_path)
query_tensor = preprocess(query_image)
query_batch = query_tensor.unsqueeze(0)  # Add batch dimension

with torch.no_grad():
    query_features = model(query_batch)


# Calculate cosine similarity with all other images in the DAM folder
similarities = []
image_paths = []

dam_folder = os.path.join(data_folder, 'DAM')

for filename in os.listdir(dam_folder):
    if filename.endswith('.jpeg'):
        image_path = os.path.join(dam_folder, filename)
        image_paths.append(image_path)

        # Load and preprocess the image
        input_image = Image.open(image_path)
        input_tensor = preprocess(input_image)
        input_batch = input_tensor.unsqueeze(0)  # Add batch dimension

        with torch.no_grad():
            features = model(input_batch)

        # Calculate cosine similarity with the query image
        similarity = calculate_cosine_similarity(query_features, features)
        similarities.append(similarity)

# Get indices of top-k similar images
top_indices = np.argsort(similarities)[-top_k:][::-1]

# Display the query image
plt.subplot(1, top_k + 1, 1)
plt.imshow(query_image)
plt.title('Query Image')
plt.axis('off')

# Display the top-k similar images
for i, index in enumerate(top_indices):
    image_path = image_paths[index]
    similar_image = Image.open(image_path)

    plt.subplot(1, top_k + 1, i + 2)
    plt.imshow(similar_image)
    plt.title(f'Top-{i + 1}')
    plt.axis('off')

plt.show()
"""

<Figure size 640x480 with 11 Axes>