In [None]:
# Install necessary libraries in Colab (if not already installed)
!pip install torch torchvision matplotlib scikit-learn

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from torch.autograd import Variable
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt
from google.colab import files
import zipfile

# Load the pretrained ResNet18 model
model = models.resnet18(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define image transformation (resizing, normalization)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to extract feature vector (embedding) from an image
def extract_features(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = Variable(image)  # Convert to variable
    with torch.no_grad():
        features = model(image)
    return features.flatten().numpy()  # Flatten to 1D array for similarity

# Function to calculate similarity between two feature vectors (Cosine Similarity)
def compute_similarity(feature1, feature2):
    return cosine_similarity([feature1], [feature2])[0][0]

# Load the dataset and extract features from all images in the dataset
def build_feature_database(dataset_path):
    feature_db = {}
    for filename in os.listdir(dataset_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(dataset_path, filename)
            feature_db[filename] = extract_features(image_path)
    return feature_db

# Classify the images from the first dataset and find the most similar image from the second dataset
def compare_datasets(dataset1_path, dataset2_path):
    # Build the feature databases for both datasets
    print("Building feature database for Dataset 1...")
    dataset1_db = build_feature_database(dataset1_path)

    print("Building feature database for Dataset 2...")
    dataset2_db = build_feature_database(dataset2_path)

    # Compare each image in Dataset 1 with all images in Dataset 2
    for image1_name, feature1 in dataset1_db.items():
        best_match = None
        best_similarity = -1

        for image2_name, feature2 in dataset2_db.items():
            similarity = compute_similarity(feature1, feature2)
            if similarity > best_similarity:
                best_similarity = similarity
                best_match = image2_name

        print(f"Best match for {image1_name} is {best_match} with similarity {best_similarity:.4f}")

        # Optionally, display the images (input from Dataset 1 vs Best Match from Dataset 2)
        image1 = Image.open(os.path.join(dataset1_path, image1_name))
        image2 = Image.open(os.path.join(dataset2_path, best_match))

        plt.subplot(1, 2, 1)
        plt.imshow(image1)
        plt.title(f"Image 1: {image1_name}")

        plt.subplot(1, 2, 2)
        plt.imshow(image2)
        plt.title(f"Best Match: {best_match}")

        plt.show()

# Main function to run the comparison
def main():
    # Step 1: Upload the first ZIP file (Dataset 1)
    print("Upload your first dataset ZIP file (Dataset 1):")
    uploaded_zip1 = files.upload()  # Upload the first dataset zip file

    # Step 2: Upload the second ZIP file (Dataset 2)
    print("Upload your second dataset ZIP file (Dataset 2):")
    uploaded_zip2 = files.upload()  # Upload the second dataset zip file

    # Extract the first dataset if it is in a ZIP file
    zip1_path = list(uploaded_zip1.keys())[0]  # Get the first dataset zip file path
    with zipfile.ZipFile(zip1_path, 'r') as zip_ref:
        zip_ref.extractall("/content/dataset1")  # Extract to /content/dataset1 folder

    # Extract the second dataset if it is in a ZIP file
    zip2_path = list(uploaded_zip2.keys())[0]  # Get the second dataset zip file path
    with zipfile.ZipFile(zip2_path, 'r') as zip_ref:
        zip_ref.extractall("/content/dataset2")  # Extract to /content/dataset2 folder

    # Set the dataset paths
    dataset1_path = "/content/dataset1"
    dataset2_path = "/content/dataset2"

    # Step 3: Compare images from the two datasets
    compare_datasets(dataset1_path, dataset2_path)

# Run the main function
main()


Upload your first dataset ZIP file (Dataset 1):
