In [None]:
from skimage import io
from skimage.color import rgb2gray
from skimage.metrics import structural_similarity as ssim
from skimage.transform import resize
import cv2
import os

# Base directory where your images are stored
base_dir = "/workspaces/CSDNA/media/"

# Function to preprocess images (resizing and grayscaling)
def preprocess_image(image_path):
    # Load the image
    image = io.imread(image_path)
    # If the image has an alpha channel, remove it
    if image.shape[-1] == 4:
        image = image[..., :3]
    # Convert the image to grayscale
    image_gray = rgb2gray(image)
    # Resize the image to a fixed size
    image_resized = resize(image_gray, (256, 256), anti_aliasing=True)
    return image_resized

# Reference image (assuming the first image is the reference)
reference_image_path = "/workspaces/CSDNA/Testing code/Test 2/density.png"
reference_image = preprocess_image(reference_image_path)

# Dictionary to store SSIM scores
ssim_scores = {}

# Loop through image files and compute SSIM
for i in range(1, 100):
    image_path = os.path.join(base_dir, f"group_{i}.png")
    if os.path.exists(image_path):  # Check if the image file exists
        # Preprocess the image
        current_image = preprocess_image(image_path)
        # Compute SSIM with the reference image
        score = ssim(reference_image, current_image, data_range=current_image.max() - current_image.min())
        ssim_scores[f"group_{i}"] = score

# Find the image with the highest SSIM score
best_image = max(ssim_scores, key=ssim_scores.get)
print(f"The best matching image is {best_image} with an SSIM score of {ssim_scores[best_image]}")

#finding the image with the least SSIM score
worst_image = min(ssim_scores, key=ssim_scores.get)
print(f"The worst matching image is {worst_image} with an SSIM score of {ssim_scores[worst_image]}")

#generating a plot of the SSIM scores of number of images within a range and also generating the statistics for mean median and standard deviation
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Create a pandas DataFrame from the dictionary of SSIM scores
df = pd.DataFrame(list(ssim_scores.items()), columns=['Image', 'SSIM'])

# Plot the SSIM scores
plt.figure(figsize=(10, 6))
sns.histplot(df['SSIM'], kde=True)
plt.title('SSIM Scores')
plt.xlabel('SSIM')
plt.ylabel('Count')
plt.show()

#generating the list of groups with more than 0.550 SSIM scores and their specific group number along with SSIM score

# Filter the DataFrame to include only the rows with SSIM > 0.550
df_filtered = df[df['SSIM'] > 0.590]
print(df_filtered)

#finding if there is 14 in df_filtered
if 'group_14' in df_filtered.values:
    print("Yes, group_14 is in the filtered dataframe")


In [1]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
import numpy as np
from scipy.spatial.distance import cosine

base_dir = "/workspaces/CSDNA/media/"
# Load VGG16 model pre-trained on ImageNet data
model_vgg16 = VGG16(weights='imagenet')
# Remove the last layer to get features instead of classifications
model_vgg16 = Model(inputs=model_vgg16.inputs, outputs=model_vgg16.layers[-2].output)

# Function to extract features from an image
def extract_features(img_path, model):
    # Load the image file, resizing it to 224x224 pixels (required by this model)
    img = image.load_img(img_path, target_size=(224, 224))
    # Convert the image to a numpy array
    img_array = image.img_to_array(img)
    # Add a forth dimension since Keras expects a list of images
    img_array = np.expand_dims(img_array, axis=0)
    # Preprocess the image data
    img_array = preprocess_input(img_array)
    # Get the features of the image
    features = model.predict(img_array)
    return features

# Paths to the uploaded images
ideal_image_path = "/workspaces/CSDNA/Testing code/Test 2/density.png"
image_paths = [os.path.join(base_dir, f"group_{i}.png") for i in range(1, 100)]
# Extract features for each image
features = [extract_features(img_path, model_vgg16) for img_path in image_paths]

# Calculate cosine similarity (this is 1 minus the cosine distance)
similarities = [[1 - cosine(f1, f2) for f2 in features] for f1 in features]

similarities


SyntaxError: invalid syntax (4232792644.py, line 29)