# Labelling using Shannon-Entropy

In [4]:
from skimage import io
from skimage.measure import shannon_entropy

def preference_function(image_1_path, image_2_path):
    image_1 = io.imread(image_1_path, as_gray=True)
    image_2 = io.imread(image_2_path, as_gray=True)

    entropy_1 = shannon_entropy(image_1)
    entropy_2 = shannon_entropy(image_2)

    return 1 if entropy_1 > entropy_2 else 0


In [5]:
preference_function("../../dataset/img0.png", "../../dataset/img1.png")

0

# Generating Pairs of Dataset 

## Reduce 5550 img dataset size to 1% ~ 55

In [6]:
import os
import random
import shutil

# Paths
original_folder = "../../dataset"
new_folder = "../one_percent_dataset"

# Create the new folder if it doesn't exist
os.makedirs(new_folder, exist_ok=True)

# Get a list of all image files in the original folder
all_images = [f for f in os.listdir(original_folder) if os.path.isfile(os.path.join(original_folder, f))]

# Calculate 1% of the total number of images
num_images_to_sample = max(1, int(len(all_images) * 0.01))  # Ensure at least one image is selected
random_sample = random.sample(all_images, num_images_to_sample)

# Copy the selected images to the new folder
for image in random_sample:
    shutil.copy(os.path.join(original_folder, image), os.path.join(new_folder, image))

print(f"Copied {len(random_sample)} images to {new_folder}")


Copied 55 images to ../one_percent_dataset


## Generate all pairs of 55 images = 1485 imgs

In [7]:
import os
import itertools
import pandas as pd

# Path to the new folder with the reduced dataset
dataset_folder = "../one_percent_dataset"

# Get a list of all images in the folder
images = [f for f in os.listdir(dataset_folder) if os.path.isfile(os.path.join(dataset_folder, f))]

# Generate all possible pairs (order doesn't matter, so use combinations)
pairs = list(itertools.combinations(images, 2))

# Convert pairs to a DataFrame for saving
pairs_df = pd.DataFrame(pairs, columns=["Image_1", "Image_2"])

# Save pairs to a CSV file
output_csv = "onepercent_all_image_pairs.csv"
pairs_df.to_csv(output_csv, index=False)

print(f"Generated {len(pairs)} pairs and saved to {output_csv}")


Generated 1485 pairs and saved to onepercent_all_image_pairs.csv
