In [None]:
#LOADING,PRE-PROCESSING AND EXTRECTING FEATURES OF THE IMAGES OF CIFAR-10 DATASET
import os
from PIL import Image

data_folder = 'Dataset'  # Path to the folder containing images

# Loading images from the folder
image_files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if os.path.isfile(os.path.join(data_folder, f))]

images = []
features = []
num_bins = 256

for file in image_files:
    with Image.open(file) as img:
        # Resizing the image to 32x32 pixels
        img_resized = img.resize((32, 32))
        # Converting the image to a list of tuples (pixels)
        img_data = list(img_resized.getdata())
        
        # Normalizing pixel values for the images
        normalized_img_data = [(pixel[0] / 255.0, pixel[1] / 255.0, pixel[2] / 255.0) for pixel in img_data]
        images.append(normalized_img_data)
        
        # Separating the pixel values for each channel (R, G, B)
        # we seperate r,g,b for every image
        r_values = [pixel[0] for pixel in img_data]
        g_values = [pixel[1] for pixel in img_data]
        b_values = [pixel[2] for pixel in img_data]
        
        # Calculating the histogram for each channel
        # we loop num of bins time and calculate every colour(r,g,b) occurence
        hist_r = [r_values.count(i) for i in range(num_bins)]
        hist_g = [g_values.count(i) for i in range(num_bins)]
        hist_b = [b_values.count(i) for i in range(num_bins)]
        
        # Concatenating the histograms to form a feature vector
        feature = hist_r + hist_g + hist_b
        features.append(feature)

print("Number of images:", len(images))




In [None]:
#Part 1
from datasketch import MinHash, MinHashLSH
# Initializing LSH
num_perm = 128  # Number of permutations
bands = 2  # Number of bands
b = int(num_perm / bands)  # Calculate the number of rows per band
lsh = MinHashLSH(num_perm=num_perm, params=(b, bands))

# Creating MinHash objects for each feature vector
minhashes = []
for feature in features:
    m = MinHash(num_perm=num_perm)
    for value in feature:
        m.update(str(value).encode('utf-8'))
    minhashes.append(m)

# Inserting MinHash objects into LSH
for i, minhash in enumerate(minhashes):
    lsh.insert(str(i), minhash)

# Function to retrieve similar images given a query image
def retrieve_similar_images(query_image_path, threshold=0.5):
    with Image.open(query_image_path) as query_img:
        # Resizing the query image to 32x32 pixels
        query_img_resized = query_img.resize((32, 32))
        # Converting the query image to a list of tuples (pixels)
        query_img_data = list(query_img_resized.getdata())
        
        # Separating the pixel values for each channel (R, G, B)
        query_r_values = [pixel[0] for pixel in query_img_data]
        query_g_values = [pixel[1] for pixel in query_img_data]
        query_b_values = [pixel[2] for pixel in query_img_data]
        
        # Calculating the histogram for each channel
        query_hist_r = [query_r_values.count(i) for i in range(num_bins)]
        query_hist_g = [query_g_values.count(i) for i in range(num_bins)]
        query_hist_b = [query_b_values.count(i) for i in range(num_bins)]
        
        # Concatenating the histograms to form a feature vector
        query_feature = query_hist_r + query_hist_g + query_hist_b

        # Creating a MinHash object for the query image
        query_minhash = MinHash(num_perm=num_perm)
        for value in query_feature:
            query_minhash.update(str(value).encode('utf-8'))

        # Querying LSH with the MinHash of the query image
        result = lsh.query(query_minhash)

        # Filtering results based on Jaccard similarity threshold
        similar_images = []
        for r in result:
            jaccard_similarity = query_minhash.jaccard(minhashes[int(r)])
            if jaccard_similarity >= threshold:
                similar_images.append(image_files[int(r)])

        return similar_images

# Example usage
query_image_path = '001.png'
similar_images = retrieve_similar_images(query_image_path)

print("Similar images:")
for image_path in similar_images:
    print(image_path)

In [None]:
#Part 2 
from sklearn.neighbors import LSHForest
from PIL import Image

# Defining parameters
n_estimators = 10  # Number of base estimators in the ensemble
n_candidates = 50  # Number of neighbors to consider for each query point
num_bins = 256  # Number of bins for the histogram

# Initializing LSHForest
lsh_forest = LSHForest(n_estimators=n_estimators, n_candidates=n_candidates, n_neighbors=1, random_state=42)

# Fit LSHForest to the feature vectors of normal images
lsh_forest.fit(features)

# Storing the representations of normal images in the LSH hash tables
representations = lsh_forest.get_params(deep=True)

# Load the new image and compute its histogram features
new_image_path = 'image.jpg'
new_image_features = []

with Image.open(new_image_path) as img:
    img_resized = img.resize((32, 32))
    img_data = list(img_resized.getdata())

    r_values = [pixel[0] for pixel in img_data]
    g_values = [pixel[1] for pixel in img_data]
    b_values = [pixel[2] for pixel in img_data]

    hist_r = [r_values.count(i) for i in range(num_bins)]
    hist_g = [g_values.count(i) for i in range(num_bins)]
    hist_b = [b_values.count(i) for i in range(num_bins)]

    new_image_features = hist_r + hist_g + hist_b

# Query the LSH hash tables to find the nearest neighbors
nearest_neighbors_indices = lsh_forest.kneighbors([new_image_features], return_distance=False)

# Measuring the distance or dissimilarity between the new image and its nearest neighbors
nearest_neighbors_distances = []
for idx in nearest_neighbors_indices[0]:
    distance = sum(abs(a - b) for a, b in zip(new_image_features, features[idx]))
    nearest_neighbors_distances.append(distance)

# Identifying the new image as an anomaly if its distance from the nearest neighbors exceeds a predefined threshold
threshold = 100
if max(nearest_neighbors_distances) > threshold:
    print("Anomaly detected: The new image is an anomaly.")
else:
    print("No anomaly detected: The new image is similar to the existing images.")





