This Notebook is contains following to compare reconstructed images using SVD At various values of K and original images : 

- Functions to extract various image quality metrics 
- Extracting Features like color, texture , shape and Save into a CSV File
- Perform Analysis in Frequency domain
- Calculate Image Quality Metrics at Various Level of Image Pixel Resolution

In [None]:
# importing the libraries
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
from skimage import exposure
import matplotlib.pyplot as plt
import cv2
import mahotas
from skimage import feature
import numpy as np
import mahotas as mt
from sewar.full_ref import mse, rmse, psnr, uqi, ssim, ergas, scc, rase, sam, msssim, vifp



In [None]:
# reading the dataset
dataset = pd.read_csv("E:\Final_Year_Project\Implementation\Image-Text-NN-SC-Detection\Analysis\HAM10000_metadata.csv")


In [None]:
# Define a dictionary to map disease codes to human-readable lesion types
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma',
    'nc': 'No_Skin_Cancer'
}

# Load your dataset from CSV file or any other source
# Replace 'your_dataset.csv' with your actual dataset file path
dataset = pd.read_csv('your_dataset.csv')

# Add a new column 'cell_type' by mapping 'dx' to human-readable lesion types
dataset['cell_type'] = dataset['dx'].map(lesion_type_dict.get)

# Convert 'cell_type' to categorical codes (integer labels)
dataset['disease_label'] = pd.Categorical(dataset['cell_type']).codes

In [None]:
# Define base directory paths
base_skin_dir = os.path.join("..", "Data")  # Replace with your original image directory
rbase_skin_dir = os.path.join("350_Components_PCA_Data")  # Replace with your reconstructed image directory

# Create dictionaries mapping image IDs to file paths
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

rimageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                      for x in glob(os.path.join(rbase_skin_dir, '*.jpg'))}

# Assuming 'dataset' is your existing DataFrame
# Add columns for original and reconstructed image paths
dataset['path_o'] = dataset['image_id'].map(imageid_path_dict.get)  # Path to original images
dataset['path_r'] = dataset['image_id'].map(rimageid_path_dict.get)  # Path to reconstructed images

# Function to load images, resize, and convert to numpy array
def load_image(image_path, resize=(450, 300)):
    return np.asarray(Image.open(image_path).resize(resize))

# Load original images and reconstructed images into DataFrame
dataset['image_o'] = dataset['path_o'].map(lambda x: load_image(x))
dataset['image_r'] = dataset['path_r'].map(lambda x: load_image(x))

# Parameterize image resize dimensions
resize_dimensions = (450, 300)

# Explanation of attributes:
# - 'path_o': Path to original images
# - 'path_r': Path to reconstructed images
# - 'image_o': Numpy array of resized original images
# - 'image_r': Numpy array of resized reconstructed images

# Display dataset attributes for comparison
print("Dataset attributes:")
print(dataset[['image_id', 'path_o', 'path_r']].head())

# Provide instructions for comparing image quality metrics
print("\nInstructions for image quality comparison:")
print(f"- Use 'image_o' and 'image_r' columns for loading original and reconstructed images.")
print(f"- Adjust 'resize_dimensions' parameter as needed for different image sizes (currently set to {resize_dimensions}).")

In [None]:
def percentage_pixel_difference(image1, image2):
    """
    Calculate the percentage of pixels that differ between two images.

    This function computes the absolute difference between two images, applies a threshold to highlight
    areas of change, and calculates the percentage of pixels that are different.

    Args:
        image1 (np.ndarray): The first image represented as a NumPy array.
        image2 (np.ndarray): The second image represented as a NumPy array.

    Returns:
        float: The percentage of pixels that are different between the two images.
    """
    # Compute the absolute difference between the images
    diff_image = cv2.absdiff(image1, image2)

    # Threshold the difference image to highlight the areas of change
    threshold = 5
    _, thresholded_diff = cv2.threshold(diff_image, threshold, 255, cv2.THRESH_BINARY)

    # Calculate the percentage of pixels that are different
    total_pixels = np.prod(image1.shape)
    different_pixels = np.count_nonzero(thresholded_diff)
    percentage_difference = (different_pixels / total_pixels) * 100
    
    return percentage_difference

# Example usage:
# percentage_difference = percentage_pixel_difference(image1, image2)
# print(f"Percentage of different pixels: {percentage_difference:.2f}%")

In [None]:

def lbp_feature(image, numPoints=16, radius=4, eps=1e-7):
    """
    Calculate Local Binary Pattern (LBP) features for an image.

    Args:
        image (np.ndarray): Input image in RGB format.
        numPoints (int, optional): Number of points to consider in the LBP calculation. Default is 16.
        radius (int, optional): Radius of the circle to consider in the LBP calculation. Default is 4.
        eps (float, optional): Small value to avoid division by zero in histogram normalization. Default is 1e-7.

    Returns:
        np.ndarray: Normalized histogram of LBP features.
    """
    gray = lambda rgb: np.dot(rgb[..., :3], [0.299, 0.587, 0.114])  # Convert RGB to grayscale
    image = gray(image)
    lbp = feature.local_binary_pattern(image, numPoints, radius, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, numPoints + 3), range=(0, numPoints + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + eps)  # Normalize histogram
    return hist

def haralick_features(image):
    """
    Calculate Haralick texture features for an image.

    Args:
        image (np.ndarray): Input image in grayscale format.

    Returns:
        np.ndarray: Mean of Haralick texture features across 4 types of adjacency.
    """
    textures = mt.features.haralick(image)  # Calculate Haralick features
    ht_mean = textures.mean(axis=0)  # Mean of Haralick features
    return ht_mean

def get_hu_moments(image):
    """
    Calculate Hu moments for an image.

    Args:
        image (np.ndarray): Input image in RGB format.

    Returns:
        np.ndarray: Hu moments of the image.
    """
    gray = lambda rgb: np.dot(rgb[..., :3], [0.299, 0.587, 0.114])  # Convert RGB to grayscale
    gray = gray(image)
    return cv2.HuMoments(cv2.moments(gray)).flatten()

def build_histogram(image, bins=256):
    """
    Build a color histogram for an image.

    Args:
        image (np.ndarray): Input image in BGR format.
        bins (int, optional): Number of bins for the histogram. Default is 256.

    Returns:
        tuple: Histograms for red, green, and blue channels.
    """
    rgb_image = np.flip(image, 2)  # Convert BGR to RGB
    image_vector = rgb_image.reshape(1, -1, 3)  # Reshape image to a vector
    div = 256 / bins  # Calculate division factor
    bins_vector = (image_vector / div).astype(int)  # Quantize colors into bins
    red = bins_vector[0, :, 0]
    green = bins_vector[0, :, 1]
    blue = bins_vector[0, :, 2]
    return red, green, blue

def mse(image1, image2):
    """
    Calculate Mean Squared Error (MSE) between two images.

    Args:
        image1 (np.ndarray): First input image.
        image2 (np.ndarray): Second input image.

    Returns:
        float: MSE between the two images.
    """
    return np.mean((image1 - image2) ** 2)

def custom_rmse(image1, image2):
    """
    Calculate Root Mean Squared Error (RMSE) between two images.

    Args:
        image1 (np.ndarray): First input image.
        image2 (np.ndarray): Second input image.

    Returns:
        float: RMSE between the two images.
    """
    return np.sqrt(mse(image1, image2))

def custom_psnr(image1, image2, max_value=255):
    """
    Calculate Peak Signal-to-Noise Ratio (PSNR) between two images.

    Args:
        image1 (np.ndarray): First input image.
        image2 (np.ndarray): Second input image.
        max_value (int, optional): Maximum possible pixel value of the images. Default is 255.

    Returns:
        float: PSNR between the two images.
    """
    mse_value = mse(image1, image2)
    if mse_value == 0:
        return float('inf')
    return 20 * np.log10(max_value / np.sqrt(mse_value))

def custom_ssim(image1, image2):
    """
    Calculate Structural Similarity Index (SSIM) between two images.

    Args:
        image1 (np.ndarray): First input image.
        image2 (np.ndarray): Second input image.

    Returns:
        float: SSIM between the two images.
    """
    K1 = 0.01
    K2 = 0.03
    L = 255  # Maximum value of pixels
    C1 = (K1 * L) ** 2
    C2 = (K2 * L) ** 2
    mu1 = np.mean(image1)
    mu2 = np.mean(image2)
    sigma1_sq = np.var(image1)
    sigma2_sq = np.var(image2)
    sigma12 = np.cov(image1.flatten(), image2.flatten())[0, 1]
    numerator = (2 * mu1 * mu2 + C1) * (2 * sigma12 + C2)
    denominator = (mu1 ** 2 + mu2 ** 2 + C1) * (sigma1_sq + sigma2_sq + C2)
    return numerator / denominator

def histogram_intersection(hist1, hist2):
    """
    Compute the histogram intersection between two histograms.

    Args:
        hist1 (np.ndarray): First histogram.
        hist2 (np.ndarray): Second histogram.

    Returns:
        float: Histogram intersection value.
    """
    minima = np.minimum(hist1, hist2)
    intersection = np.true_divide(np.sum(minima), np.sum(hist1))
    return intersection

def histogram_correlation(hist1, hist2):
    """
    Compute the correlation coefficient between two histograms.

    Args:
        hist1 (np.ndarray): First histogram.
        hist2 (np.ndarray): Second histogram.

    Returns:
        float: Correlation coefficient.
    """
    mean1 = np.mean(hist1)
    mean2 = np.mean(hist2)
    std1 = np.std(hist1)
    std2 = np.std(hist2)
    correlation = np.mean((hist1 - mean1) * (hist2 - mean2)) / (std1 * std2)
    return correlation

def histogram_chi_square(hist1, hist2):
    """
    Compute the Chi-Square distance between two histograms.

    Args:
        hist1 (np.ndarray): First histogram.
        hist2 (np.ndarray): Second histogram.

    Returns:
        float: Chi-Square distance.
    """
    chi_square = np.sum(np.square(hist1 - hist2) / (hist1 + hist2 + 1e-10))
    return chi_square

def histogram_bhattacharyya(hist1, hist2):
    """
    Compute the Bhattacharyya distance between two histograms.

    Args:
        hist1 (np.ndarray): First histogram.
        hist2 (np.ndarray): Second histogram.

    Returns:
        float: Bhattacharyya distance.
    """
    hist1_normalized = hist1 / np.sum(hist1)
    hist2_normalized = hist2 / np.sum(hist2)
    bhattacharyya = -np.log(np.sum(np.sqrt(hist1_normalized * hist2_normalized)))
    return bhattacharyya

def euclidean_distance(vector1, vector2):
    """
    Calculate the Euclidean distance between two vectors.

    Args:
        vector1 (np.ndarray): First vector.
        vector2 (np.ndarray): Second vector.

    Returns:
        float: Euclidean distance between the two vectors.
    """
    return np.linalg.norm(vector1 - vector2)

def manhattan_distance(vector1, vector2):
    """
    Calculate the Manhattan distance between two vectors.

    Args:
        vector1 (np.ndarray): First vector.
        vector2 (np.ndarray): Second vector.

    Returns:
        float: Manhattan distance between the two vectors.
    """
    return np.sum(np.abs(vector1 - vector2))

def normalized_euclidean_distance(vector1, vector2):
    """
    Compute the normalized Euclidean distance between two vectors.
    
    Parameters:
    vector1 (array_like): First input vector.
    vector2 (array_like): Second input vector.
    
    Returns:
    float: Normalized Euclidean distance between the two vectors.
    """
    # Convert input vectors to numpy arrays
    vector1 = np.array(vector1)
    vector2 = np.array(vector2)
    
    # Normalize the input vectors
    vector1_norm = vector1 / np.linalg.norm(vector1)
    vector2_norm = vector2 / np.linalg.norm(vector2)
    
    # Compute the Euclidean distance between the normalized vectors
    euclidean_distance = np.linalg.norm(vector1_norm - vector2_norm)
    
    return euclidean_distance


Calcuating Percentage Pixel Difference at various Values of K

In [None]:
# Create an empty DataFrame to store the image quality metrics
# Run this Code for every value of K you want to analyse reconstruction metrics for provided 
# reconstructed images at that k value exist

k = 100 # Number of singular values used in construction
distances_df = pd.DataFrame(columns=['Image_ID', f'%_pixel_diff_{k}'])

# To keep track of image number we are on
i = 0
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")
#Iterate Over all rows in dataset
for index, row in dataset.iterrows():
    print(f"For image {i} ... ")
    image1 = row['image_o']
    image2 = row['image_r']
    
    d = percentage_pixel_difference(image1, image2)
    
    
    # Append distances to DataFrame
    distances_df = distances_df.append({
        'Image_ID': row['image_id'],
        '%_pixel_diff': d
    }, ignore_index=True)
    
    i+=1
# Save the distances DataFrame as a CSV file
distances_df.to_csv(f'percentage_pixel_all_images_difference_{k}_Components.csv', index=False)

Extracting Texture Information for Original and reconstructed Dataset - Euclidean Distance Haralick Texture Features, Local Binary Pattern Histogram Intersection and Bhattacharya distance for LBP

In [None]:
# Create an empty DataFrame to store the image quality metrics
# Create an empty DataFrame to store the image quality metrics
distances_df = pd.DataFrame(columns=['Image_ID', 'lbp_intersection',
                                      'lbp_bhattacharya',
                                      'haralick_euclidean'])

# To keep track of image number we are on
i = 0
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")
for index, row in dataset.iterrows():
    print(f"For image {i} ... ")
    image1 = row['image_o']
    image2 = row['image_r']
    
    # extract color hisograms
    tlbp1, tharalick1 = lbp_feature(image1), haralick_features(image1)
    tlbp2, tharalick2= lbp_feature(image2), haralick_features(image2)
    
    # Calculate Bhattacharyya distance for each component
    lh = histogram_intersection(tlbp1,tlbp2)
    lb = histogram_bhattacharyya(tlbp1,tlbp2)
    he = np.linalg.norm(tharalick1 - tharalick2)
    
    # Append distances to DataFrame
    distances_df = distances_df.append({
        'Image_ID': row['image_id'],
        'lbp_intersection': lh,
        'lbp_bhattacharya': lb,
        'haralick_euclidean': he
        #'wavelet_correlation': we
    }, ignore_index=True)
    
    i+=1
# Save the distances DataFrame as a CSV file
distances_df.to_csv(f'image_reconstruction_{k}_texture_Components_analysis.csv', index=False)

Extracting Color Information - Bhattacharya Distance between reconstructed and original color histograms

In [None]:
# To keep track of image number we are on
i = 0
for index, row in dataset.iterrows():
    print(f"For image {i} ... ")
    image1 = row['image_o']
    image2 = row['image_r']
    
    # Extract color histograms
    hist_red1, hist_green1, hist_blue1 = build_histogram(image1)
    hist_red2, hist_green2, hist_blue2 = build_histogram(image2)
    
    # Calculate Bhattacharyya distance for each color channel
    blue_bhattacharyya_dist = histogram_bhattacharyya(hist_blue1, hist_blue2)
    green_bhattacharyya_dist = histogram_bhattacharyya(hist_green1, hist_green2)
    red_bhattacharyya_dist = histogram_bhattacharyya(hist_red1, hist_red2)
    
    # Append distances to DataFrame
    distances_df = distances_df.append({
        'Image_ID': row['image_id'],
        'blue_bhattacharyya_distance': blue_bhattacharyya_dist,
        'green_bhattacharyya_distance': green_bhattacharyya_dist,
        'red_bhattacharyya_distance': red_bhattacharyya_dist
    }, ignore_index=True)
    
    i += 1

# Save the distances DataFrame as a CSV file
distances_df.to_csv(f'image_reconstruction_{k}_color_Components_analysis.csv', index=False)

Extracting and Calculating euclidean distance and correlation between Hu Moments of reocnstructed and original images for Shape Information


In [None]:
# Create an empty DataFrame to store the image quality metrics
distances_df = pd.DataFrame(columns=['Image_ID', 'correlation_coefficient', 'euclidean_distance'])

# Iterate over every row and print it
i = 0
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")
for index, row in dataset.iterrows():
    print(f"For image {i} ... ")
    image1 = row['image_o']
    image2 = row['image_r']
    
    # extract hu moments
    hm1 = get_hu_moments(dataset,image1)
    hm2 = get_hu_moments(dataset, image2)
    
    # get hu moments
    cc = histogram_correlation(hm1, hm2)
    ed = euclidean_distance(hm1,hm2)
    
    distances_df = distances_df.append({
        'Image_ID': row['image_id'],
        'correlation_coefficient': cc,
        'euclidean_distance': ed
    }, ignore_index=True)
    
    i+=1
# Save the distances DataFrame as a CSV file
distances_df.to_csv(f'image_reconstruction_{k}_shape_Components_analysis.csv', index=False)

Image Quality Metrics at K values - PSNR, RMSE, SSIM 

In [None]:
# Initialize lists to store the image quality metrics
image_ids = []
rmse_values = []
psnr_values = []
ssim_values = []


# Iterate over the rows in the dataset
number = 0
for i, row in enumerate(dataset.itertuples(), 1):
    image1 = row.image_o
    image2 = row.image_r
    print(f"for image {number}")

    # Convert images to YCbCr color space
    image1_yuv = rgb2ycbcr(image1)
    image2_yuv = rgb2ycbcr(image2)
    
    # Extract the luminance channel (Y)
    image1_y = image1_yuv[:, :, 0]
    image2_y = image2_yuv[:, :, 0]
    
    # Calculate image quality metrics
    rmse_value = custom_rmse(image1, image2)
    psnr_value = custom_psnr(image1, image2)
    ssim_value = custom_ssim(image1_y, image2_y)
    
    
    
    # Append values to lists
    image_ids.append(row.image_id)
    rmse_values.append(rmse_value)
    psnr_values.append(psnr_value)
    ssim_values.append(ssim_value)
    

    number += 1

# Create a DataFrame with the collected metrics
distances_df = pd.DataFrame({
    'Image_ID': image_ids,
    'RMSE': rmse_values,
    'PSNR': psnr_values,
    'SSIM': ssim_values,
    
})

# Save the DataFrame as a CSV file
distances_df.to_csv(f'image_reconstruction_metrics_{k}_Components_analysis.csv', index=False)

Calculating Explained Ratio for K Singular Values used in reconstruction

In [None]:
from sklearn.decomposition import MiniBatchSparsePCA
def pca_ratio(image, n_components):
    """
    Applies PCA to the color channels of an image and returns the explained variance ratio for each channel.

    Parameters:
        image (numpy.ndarray): The input image.
        n_components (int): Number of principal components to keep.

    Returns:
        list: A list containing the explained variance ratios for the red, green, and blue channels.
    """
    # Split the image into its blue, green, and red components
    blue, green, red = cv2.split(image)
    
    # Initialize PCA for each color channel
    pca_b = PCA(n_components, svd_solver='full')
    pca_g = PCA(n_components, svd_solver='full')
    pca_r = PCA(n_components, svd_solver='full')
    
    # Apply PCA to each channel and calculate the explained variance ratio
    red_transformed = pca_r.fit_transform(red)
    red_ratio = sum(pca_r.explained_variance_ratio_)
    
    green_transformed = pca_g.fit_transform(green)
    green_ratio = sum(pca_g.explained_variance_ratio_)
    
    blue_transformed = pca_b.fit_transform(blue)
    blue_ratio = sum(pca_b.explained_variance_ratio_)
    
    return [red_ratio, green_ratio, blue_ratio]

# Create an empty DataFrame to store the image quality metrics
distances_df = pd.DataFrame(columns=['Image_ID', 'ratio_red', 'ratio_green', 'ratio_blue'])

# Suppress all warnings
warnings.filterwarnings("ignore")

# Iterate over every row in the dataset
i = 0
for index, row in dataset.iterrows():
    print(f"For image {i} ... ")
    image1 = row['image_o']
    
    # Apply PCA and get the explained variance ratios for the image
    p = pca_ratio(image1, 200)
    
    # Append the results to the DataFrame
    distances_df = distances_df.append({
        'Image_ID': row['image_id'],
        'ratio_red': p[0],
        'ratio_green': p[1],
        'ratio_blue': p[2]
    }, ignore_index=True)
    
    i += 1

# Save the distances DataFrame as a CSV file
k = 100 # change value of k to calculate for different number of singular values
distances_df.to_csv('SVD_{k}_shape_Components_analysis.csv', index=False)

Additional Metrics - Like Variety of Information and Normalized Mutual Information at various values of K

In [None]:
from skimage.metrics import variation_of_information, normalized_mutual_information
import pandas as pd

# Create lists to store the image quality metrics
image_ids = []
normalized_mutual_info_scores = []
voi_scores = []

# Keep track of the row number
number = 0

# Iterate over rows in the dataset
for i, row in enumerate(dataset.itertuples(), 1):
    image1 = row.image_o  # Original image
    image2 = row.image_r  # Reconstructed image
    print(f"Processing image {number} ...")
   
    # Calculate Variation of Information (VOI) between the original and reconstructed images
    voi = variation_of_information(image1, image2)
    
    # Calculate Normalized Mutual Information (NMI) between the original and reconstructed images
    nmi = normalized_mutual_information(image1, image2)
    
    # Append values to respective lists
    image_ids.append(row.image_id)
    normalized_mutual_info_scores.append(nmi)
    voi_scores.append(voi)

    number += 1

# Create a DataFrame to store the collected metrics
distances_df = pd.DataFrame({
    'Image_ID': image_ids,
    'normalized_mutual_information': normalized_mutual_info_scores,
    'VOI': voi_scores
})

# Save the DataFrame as a CSV file
distances_df.to_csv(f'image_reconstruction_additional_metrics_{k}_analysis.csv', index=False)

Extracting Features in Frequency Analysis


In [None]:
import cv2
import numpy as np

def spectral_loss(image1, image2):
    # Convert images to grayscale
    image1_gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    image2_gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

    # Compute the Fourier Transform
    fft_image1 = np.fft.fft2(image1_gray)
    fft_image2 = np.fft.fft2(image2_gray)

    # Shift the zero-frequency component to the center
    fft_shifted_image1 = np.fft.fftshift(fft_image1)
    fft_shifted_image2 = np.fft.fftshift(fft_image2)

    # Compute the magnitude spectra
    magnitude_spectrum_image1 = np.log(np.abs(fft_shifted_image1) + 1)
    magnitude_spectrum_image2 = np.log(np.abs(fft_shifted_image2) + 1)

    # Compute the spectral loss
    spectral_loss = np.mean(np.abs(magnitude_spectrum_image1 - magnitude_spectrum_image2))

    return spectral_loss



In [None]:

def spectral_loss_coeff(image1, image2):
    # Convert images to grayscale
    image1_gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    image2_gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

    # Compute the Fourier Transform
    fft_image1 = np.fft.fft2(image1_gray)
    fft_image2 = np.fft.fft2(image2_gray)

    # Compute the magnitudes of the Fourier coefficients
    magnitude_fft_image1 = np.abs(fft_image1)
    magnitude_fft_image2 = np.abs(fft_image2)

    # Compute the spectral loss
    spectral_loss = np.mean(np.abs(magnitude_fft_image1 - magnitude_fft_image2))

    return spectral_loss

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def compare_frequency_components(image1, image2):
    # Load images
    image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
    
    imag1 = image1
    # Compute the Discrete Fourier Transform (DFT) for both images
    dft1 = np.fft.fft2(image1)
    dft2 = np.fft.fft2(image2)
    
    # Shift the zero-frequency component
    dft_shifted1 = np.fft.fftshift(dft1)
    dft_shifted2 = np.fft.fftshift(dft2)
    
    # Compute the Magnitude Spectrum for both images
    magnitude_spectrum1 = np.abs(dft_shifted1)
    magnitude_spectrum2 = np.abs(dft_shifted2)
    # Flatten the magnitude spectra to 1D arrays
    mag_spec1_flat = magnitude_spectrum1.flatten()
    mag_spec2_flat = magnitude_spectrum2.flatten()
    
    # Calculate the correlation coefficient
    correlation_matrix = np.corrcoef(mag_spec1_flat, mag_spec2_flat)
    correlation_coefficient = correlation_matrix[0, 1]
    
    
    # Define the dimensions of the images
    rows, cols = image1.shape
    center_row, center_col = rows // 2, cols // 2
    
    # Define the high-pass and low-pass filters
    high_pass_filter = np.ones((rows, cols), np.uint8)
    high_pass_filter[center_row - 30:center_row + 30, center_col - 30:center_col + 30] = 0
    low_pass_filter = np.zeros((rows, cols), np.uint8)
    low_pass_filter[center_row - 30:center_row + 30, center_col - 30:center_col + 30] = 1
    
    # Apply the filters to the magnitude spectra to obtain high-frequency and low-frequency components
    high_frequency_spectrum1 = magnitude_spectrum1 * high_pass_filter
    low_frequency_spectrum1 = magnitude_spectrum1 * low_pass_filter
    high_frequency_spectrum2 = magnitude_spectrum2 * high_pass_filter
    low_frequency_spectrum2 = magnitude_spectrum2 * low_pass_filter
    # Calculate the correlation coefficient
    correlation_matrix2 = np.corrcoef(high_frequency_spectrum1.flatten(), high_frequency_spectrum2.flatten())
    correlation_coefficient2 = correlation_matrix2[0, 1]
    
    # Calculate the correlation coefficient
    correlation_matrix1= np.corrcoef(low_frequency_spectrum1.flatten(), low_frequency_spectrum2.flatten())
    correlation_coefficient1 = correlation_matrix1[0, 1]
    # Compute Mean Squared Error (MSE) between high-frequency and low-frequency components
    mse_high = np.mean(np.abs(high_frequency_spectrum1 - high_frequency_spectrum2))
    mse_low = np.mean(np.abs(low_frequency_spectrum1 - low_frequency_spectrum2))
    
    return correlation_coefficient2,correlation_coefficient1, correlation_coefficient, mse_high, mse_low

def plot_reconstructed_images(original_img):
    # Compute Fourier Transform of the original image
    original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    original_fft = np.fft.fft2(original_img)

    # Shift the zero-frequency component to the center
    original_fft_shifted = np.fft.fftshift(original_fft)

    # Define the dimensions of the image
    rows, cols = original_img.shape

    # Create a high-pass filter to isolate high-frequency components
    high_pass_filter = np.ones((rows, cols), np.uint8)
    high_pass_filter[rows//2 - 30:rows//2 + 30, cols//2 - 30:cols//2 + 30] = 0

    # Apply the high-pass filter to isolate high-frequency components
    high_freq_fft_shifted = original_fft_shifted * high_pass_filter

    # Reconstruct the image from high-frequency components
    high_freq_reconstructed = np.fft.ifft2(np.fft.ifftshift(high_freq_fft_shifted)).real

    # Create a low-pass filter to isolate low-frequency components
    low_pass_filter = 1 - high_pass_filter

    # Apply the low-pass filter to isolate low-frequency components
    low_freq_fft_shifted = original_fft_shifted * low_pass_filter

    # Reconstruct the image from low-frequency components
    low_freq_reconstructed = np.fft.ifft2(np.fft.ifftshift(low_freq_fft_shifted)).real

    # Plot the reconstructed images
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.imshow(high_freq_reconstructed, cmap='gray')
    plt.title('Reconstructed Image from High-Frequency Components')
    #plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(low_freq_reconstructed, cmap='gray')
    plt.title('Reconstructed Image from Low-Frequency Components')
    #plt.axis('off')

    plt.tight_layout()
    plt.show()
    

In [None]:
import pandas as pd
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Initialize a list to collect results
results = []
components = 200 # number of singular values
for index, row in dataset.iterrows():
    print(f"For image {index} ... ")
    image1 = row['image_o']
    image2 = row['image_r']
    
    #plot_reconstructed_images(image1)
    #plot_reconstructed_images(image2)

    # Example usage:
    ccfh, ccfl, ccf, mse_high, mse_low = compare_frequency_components(image1, image2)
    spectral_loss = spectral_loss_coeff(image1, image2)
    
    # Collect the results
    result = {
        "Image Index": index,
        "Spectral Loss Coeff": spectral_loss,
        "Correlation Coeff": ccf,
        "High Freq Corr Coeff": ccfh,
        "Low Freq Corr Coeff": ccfl,
        "MSE High-Frequency Component": mse_high,
        "MSE Low-Frequency Component": mse_low
    }
    
    results.append(result)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv(f"image_comparison_frequency_{components}_results.csv", index=False)

print("Results saved to image_comparison_results.csv")


Image resizing analysis at different Pixel Resolutions



In [None]:
# List of sizes to resize the images
sizes = [(450, 400), (450, 300), (300, 300), (450, 150), (200, 350), (300, 600), (256, 256), (200, 100), (400, 200)]

for size in sizes:
    # Resize images to the current size in the loop
    print("Loading Image for size ", size)
    dataset['image_o'] = dataset['path_o'].map(lambda x: np.asarray(Image.open(x).resize(size)))
    dataset['image_r'] = dataset['path_r'].map(lambda x: np.asarray(Image.open(x).resize(size)))
    
    # Create lists to store the metrics
    image_ids = []
    rmse_values = []
    psnr_values = []
    ssim_values = []
    hds = []
    vois = []
    ccf_values = []
    ccfh_values = []
    ccfl_values = []
    mse_high_values = []
    mse_low_values = []
    spectral_loss_values = []

    # Iterate over rows in the dataset
    for number, row in enumerate(dataset.itertuples(), 1):
        image1 = row.image_o
        image2 = row.image_r
        print(f"Processing image {number}")

        # Convert images to YCbCr color space
        image1_yuv = rgb2ycbcr(image1)
        image2_yuv = rgb2ycbcr(image2)
        
        # Extract the luminance channel (Y)
        image1_y = image1_yuv[:, :, 0]
        image2_y = image2_yuv[:, :, 0]
        
        # Calculate image quality metrics
        rmse_value = custom_rmse(image1, image2)  # Root Mean Square Error
        psnr_value = custom_psnr(image1, image2)  # Peak Signal-to-Noise Ratio
        ssim_value = custom_ssim(image1_y, image2_y)  # Structural Similarity Index
        voi = variation_of_information(image1, image2)  # Variation of Information
        hd = normalized_mutual_information(image1, image2)  # Normalized Mutual Information

        # Calculate frequency comparison metrics
        ccfh, ccfl, ccf, mse_high, mse_low = compare_frequency_components(image1, image2)
        spectral_loss = spectral_loss_coeff(image1, image2)

        # Append values to lists
        image_ids.append(row.image_id)
        rmse_values.append(rmse_value)
        psnr_values.append(psnr_value)
        ssim_values.append(ssim_value)
        hds.append(hd)
        vois.append(voi)
        ccf_values.append(ccf)
        ccfh_values.append(ccfh)
        ccfl_values.append(ccfl)
        mse_high_values.append(mse_high)
        mse_low_values.append(mse_low)
        spectral_loss_values.append(spectral_loss)

    # Create a DataFrame to store the collected metrics
    distances_df = pd.DataFrame({
        'Image_ID': image_ids,
        'RMSE': rmse_values,
        'PSNR': psnr_values,
        'SSIM': ssim_values,
        'normalized_mutual_information': hds,
        'VOI': vois,
        'Correlation Coeff': ccf_values,
        'High Freq Corr Coeff': ccfh_values,
        'Low Freq Corr Coeff': ccfl_values,
        'MSE High-Frequency Component': mse_high_values,
        'MSE Low-Frequency Component': mse_low_values,
        'Spectral Loss Coeff': spectral_loss_values
    })

    # Save the DataFrame as a CSV file
    distances_df.to_csv(f"Img_resizing_analysis/Image_resizing_information_additional_preservation_{size[0]}x{size[1]}_analysis.csv", index=False)
    print(f"Results saved to Image_resizing_information_additional_preservation_{size[0]}x{size[1]}_analysis.csv")

In [None]:
import pandas as pd

def analyze_csv_and_save_with_stats(input_csv):
    # Read the input CSV file
    df = pd.read_csv(input_csv)
    
    # Describe the dataset to get statistics
    stats_df = df.describe()
    
    # Generate the new filename
    output_csv = input_csv.split('.csv')[0] + '_stats.csv'
    
    # Save the statistics DataFrame to a new CSV file
    stats_df.to_csv(output_csv)
    
    print(f"Statistics saved to {output_csv}")
import os
# Example usage
current_directory = "Img_resizing_analysis/"
for filename in os.listdir(current_directory):
    if "New_Image_resizing_information_additional_preservation" in filename and filename.endswith('.csv'):
        input_csv = os.path.join(current_directory,filename)
        analyze_csv_and_save_with_stats(input_csv)


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import math

def extract_and_plot_means(directory):
    # Dictionary to store mean values from each file
    column_data = {}

    # Iterate over files in the directory
    for filename in os.listdir(directory):
        if "New_Image_resizing_information_additional_preservation" in filename and "stats" in filename and filename.endswith('.csv'):
            # Extract the string between "(" and ")" in the filename
            label = filename.split('_')[6]

            # Read the CSV file and extract mean values
            df = pd.read_csv(os.path.join(directory, filename), index_col=0)
            mean_values = df.loc['mean']  # Get row with index 'mean'

            # Store the mean values for each column
            for column, value in mean_values.items():
                if column not in column_data:
                    column_data[column] = []
                column_data[column].append((label, value))

    # Determine the number of plots needed
    num_plots = len(column_data)
    num_figures = math.ceil(num_plots / 4)  # Each figure contains 4 subplots (2x2 grid)

    # Plot mean values in separate figures
    for fig_idx in range(num_figures):
        fig, axs = plt.subplots(2, 2, figsize=(12, 10))
        subplot_labels = 'ABCD'

        for subplot_idx in range(4):
            column_idx = fig_idx * 4 + subplot_idx
            if column_idx >= num_plots:
                break
            
            column, data = list(column_data.items())[column_idx]
            row = subplot_idx // 2
            col = subplot_idx % 2

            labels, means = zip(*data)
            axs[row, col].plot(labels, means, color='red')
            axs[row, col].set_xlabel('Image Size')
            axs[row, col].set_ylabel('Mean Value')
            axs[row, col].set_title(f' ({subplot_labels[subplot_idx]}): Image Resizing Comparison: {column}',pad=20)
            axs[row, col].tick_params(axis='x', rotation=45)
            axs[row, col].tick_params(axis='both', labelsize=10)
            axs[row, col].grid(True)

        plt.tight_layout()
        plt.show()

# Example usage
current_directory = "Img_resizing_analysis/"
extract_and_plot_means(current_directory)


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import math

def extract_and_plot_means(directory):
    # Dictionary to store mean values from each file
    column_data = {}

    # Iterate over files in the directory
    for filename in os.listdir(directory):
        if "New_Image_resizing_information_additional_preservation" in filename and "stats" in filename and filename.endswith('.csv'):
            # Extract the string between "(" and ")" in the filename
            label = filename.split('_')[6]

            # Read the CSV file and extract mean values
            df = pd.read_csv(os.path.join(directory, filename), index_col=0)
            mean_values = df.loc['std']  # Get row with index 'mean'

            # Store the mean values for each column
            for column, value in mean_values.items():
                if column not in column_data:
                    column_data[column] = []
                column_data[column].append((label, value))

    # Determine the number of plots needed
    num_plots = len(column_data)
    num_figures = math.ceil(num_plots / 4)  # Each figure contains 4 subplots (2x2 grid)

    # Plot mean values in separate figures
    for fig_idx in range(num_figures):
        fig, axs = plt.subplots(2, 2, figsize=(12, 10))
        subplot_labels = 'ABCD'

        for subplot_idx in range(4):
            column_idx = fig_idx * 4 + subplot_idx
            if column_idx >= num_plots:
                break
            
            column, data = list(column_data.items())[column_idx]
            row = subplot_idx // 2
            col = subplot_idx % 2

            labels, means = zip(*data)
            axs[row, col].plot(labels, means, color='blue')
            axs[row, col].set_xlabel('Image Size')
            axs[row, col].set_ylabel('Std Deviation Value')
            axs[row, col].set_title(f' ({subplot_labels[subplot_idx]}): Image Resizing Comparison: {column}',pad=20)
            axs[row, col].tick_params(axis='x', rotation=45)
            axs[row, col].tick_params(axis='both', labelsize=10)
            axs[row, col].grid(True)

        plt.tight_layout()
        plt.show()

# Example usage
current_directory = "Img_resizing_analysis/"
extract_and_plot_means(current_directory)
