In [1]:
#it's a good idea to make a plot to see how they are distributed
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from skimage.feature import graycomatrix, graycoprops

def calculate_glcm_features(gray_image, mask):
    """Calculate GLCM features for the masked area of a grayscale image."""
    mask_bool = mask > 0
    if np.sum(mask_bool) == 0:
        return 0, 0, 0, 0

    masked_image = gray_image[mask_bool]
    if masked_image.ndim != 1:
        raise ValueError("Masked image dimension error. Expecting a 1D array of pixel values.")
    masked_image = np.clip(masked_image, 0, 255).astype('uint8')
    glcm = graycomatrix(masked_image.reshape(-1, 1), [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], 256, symmetric=True, normed=True)
    
    contrast = graycoprops(glcm, 'contrast')
    homogeneity = graycoprops(glcm, 'homogeneity')
    energy = graycoprops(glcm, 'energy')
    correlation = graycoprops(glcm, 'correlation')
    
    return np.mean(contrast), np.mean(homogeneity), np.mean(energy), np.mean(correlation)

def visualize_results(original_image, mask, gray_image, title, features):
    """Visualize the original image, mask applied, and grayscale image for comparison."""
    plt.figure(figsize=(18, 6))
    plt.subplot(1, 3, 1)
    masked_original = cv2.bitwise_and(original_image, original_image, mask=mask)
    plt.imshow(cv2.cvtColor(masked_original, cv2.COLOR_BGR2RGB))
    plt.title('Original with Mask')
    plt.axis('off')

    plt.subplot(1, 3, 2)
    masked_grayscale = cv2.bitwise_and(gray_image, gray_image, mask=mask)
    plt.imshow(masked_grayscale, cmap='gray')
    plt.title('Grayscale with Mask')
    plt.axis('off')

    plt.subplot(1, 3, 3)
    plt.text(0.5, 0.8, f"Contrast: {features[0]:.2f}", fontsize=12, ha='center')
    plt.text(0.5, 0.6, f"Homogeneity: {features[1]:.2f}", fontsize=12, ha='center')
    plt.text(0.5, 0.4, f"Energy: {features[2]:.2f}", fontsize=12, ha='center')
    plt.text(0.5, 0.2, f"Correlation: {features[3]:.2f}", fontsize=12, ha='center')
    plt.axis('off')

    plt.suptitle(title)
    plt.show()

def find_mask(mask_dirs, base_name):
    """Recursively find the first available mask for a given image base name from directory and subdirectories."""
    for root, dirs, files in os.walk(mask_dirs):
        for file in files:
            if file == base_name + '_mask.png':
                return cv2.imread(os.path.join(root, file), cv2.IMREAD_GRAYSCALE)
    return None

def process_images_with_glcm(data_dirs, mask_dir):
    for data_dir in data_dirs:
        category = os.path.basename(data_dir)
        for filename in os.listdir(data_dir):
            if filename.endswith(".png"):
                base_name = filename[:-4]
                image_path = os.path.join(data_dir, filename)
                original_image = cv2.imread(image_path)
                if original_image is None:
                    print(f"Error loading image {filename}. Skipping...")
                    continue

                gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
                mask = find_mask(mask_dir, base_name)
                if mask is None:
                    print(f"No mask found for {filename}. Skipping...")
                    continue

                features = calculate_glcm_features(gray_image, mask)
                title = f"{category} - Mask found - {base_name}"
                visualize_results(original_image, mask, gray_image, title, features)

data_dirs = [
    r'C:\Users\Mykyta\Documents\GitHub\projects-in-data-science\data\raw_pictures',
    
]

mask_dir = r'C:\Users\Mykyta\Documents\GitHub\projects-in-data-science\data\masks'
print('''Brief description of sub-features
1. Contrast
What It Measures: Difference in intensity between a pixel and its neighbors across the image.
Interpretation: Higher values indicate a textured or highly variable image area.
2. Homogeneity
What It Measures: How close the elements of the GLCM are to its diagonal.
Interpretation: Higher values suggest smooth and less varied texture.
3. Energy
What It Measures: Sum of squared values in the GLCM.
Interpretation: Higher values denote uniform texture and regular patterns.
4. Correlation
What It Measures: Degree to which a pixel is correlated to its neighbors.
Interpretation: High values imply predictable patterns and structured textures.''')
process_images_with_glcm(data_dirs, mask_dir)


Brief description of sub-features
1. Contrast
What It Measures: Difference in intensity between a pixel and its neighbors across the image.
Interpretation: Higher values indicate a textured or highly variable image area.
2. Homogeneity
What It Measures: How close the elements of the GLCM are to its diagonal.
Interpretation: Higher values suggest smooth and less varied texture.
3. Energy
What It Measures: Sum of squared values in the GLCM.
Interpretation: Higher values denote uniform texture and regular patterns.
4. Correlation
What It Measures: Degree to which a pixel is correlated to its neighbors.
Interpretation: High values imply predictable patterns and structured textures.


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\Mykyta\\Documents\\GitHub\\projects-in-data-science\\data\\raw_pictures'