In [None]:
#on full dataset
import os
import numpy as np
import cv2
from skimage.feature import graycomatrix, graycoprops
import pandas as pd

def extract_glcm_features(image, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
    """
    Extracts four GLCM texture features: contrast, correlation, energy, and homogeneity.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    glcm = graycomatrix(gray, distances=distances, angles=angles, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast').mean()
    correlation = graycoprops(glcm, 'correlation').mean()
    energy = graycoprops(glcm, 'energy').mean()
    homogeneity = graycoprops(glcm, 'homogeneity').mean()
    return [contrast, correlation, energy, homogeneity]

def extract_ngtdm_features(image, window_size=3):
    """
    Extracts Busyness and Complexity features using a vectorized implementation of the
    Neighborhood Grey-Tone Difference Matrix (NGTDM).
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    pad = window_size // 2
    h, w = gray.shape

    # Use only the region with a full neighborhood
    valid_gray = gray[pad:h-pad, pad:w-pad].astype(np.float32)
    
    # Define kernel that sums the neighborhood excluding the center pixel
    kernel = np.ones((window_size, window_size), dtype=np.float32)
    kernel[pad, pad] = 0
    
    # Compute the sum of the neighbors using convolution
    neighbor_sum = cv2.filter2D(gray.astype(np.float32), -1, kernel, borderType=cv2.BORDER_CONSTANT)[pad:h-pad, pad:w-pad]
    num_neighbors = window_size * window_size - 1
    neighbor_avg = neighbor_sum / num_neighbors

    # Compute the absolute differences between each valid pixel and its neighbor average
    diff = np.abs(valid_gray - neighbor_avg)

    # Flatten the valid region arrays for vectorized accumulation
    flat_pixels = valid_gray.flatten().astype(np.int32)
    flat_diff = diff.flatten()
    total_pixels = flat_pixels.size

    # Accumulate difference sums (s) and counts (n) for each intensity level (0-255)
    s = np.bincount(flat_pixels, weights=flat_diff, minlength=256)
    n = np.bincount(flat_pixels, minlength=256)
    p = n / total_pixels  # probability for each gray level

    intensity_levels = np.arange(256)
    
    # Compute Busyness
    numerator_busyness = np.sum(p * s)
    diff_matrix = np.abs(intensity_levels[:, None] - intensity_levels)
    denominator_busyness = np.sum(diff_matrix * np.outer(p, p))
    busyness = numerator_busyness / denominator_busyness if denominator_busyness != 0 else 0

    # Compute Complexity using a vectorized approach:
    A = p * s
    # For each intensity level, sum of absolute differences with all other levels
    diff_sum = np.sum(np.abs(intensity_levels - intensity_levels[:, None]), axis=1)
    numerator_complexity = 2 * np.sum(A * diff_sum)
    denominator_complexity = np.sum(A)
    complexity = numerator_complexity / denominator_complexity if denominator_complexity != 0 else 0

    return [busyness, complexity]

def process_dataset(dataset_path):
    """
    Processes the dataset by iterating over each class folder and each image file,
    extracting both GLCM and NGTDM features.
    """
    features = []
    labels = []
    image_ids = []
    
    for class_name in sorted(os.listdir(dataset_path)):
        class_path = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_path):
            print(f"Extracting features from {class_name}")
            for image_name in os.listdir(class_path):
                if image_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    image_path = os.path.join(class_path, image_name)
                    try:
                        image = cv2.imread(image_path)
                        if image is not None:
                            glcm_features = extract_glcm_features(image)
                            ngtdm_features = extract_ngtdm_features(image)
                            combined_features = glcm_features + ngtdm_features
                            features.append(combined_features)
                            labels.append(class_name)
                            image_ids.append(image_name)
                    except Exception as e:
                        print(f"Error processing {image_name}: {e}")
    
    return features, labels, image_ids

def main():
    dataset_path = r"F:\Optimized_output_final"
    features, labels, image_ids = process_dataset(dataset_path)
    
    # DataFrame with combined features:
    # GLCM: Contrast, Correlation, Energy, Homogeneity
    # NGTDM: Busyness, Complexity
    df = pd.DataFrame(features, columns=['Contrast', 'Correlation', 'Energy', 'Homogeneity', 'Busyness', 'Complexity'])
    df['Class'] = labels
    df['Image_ID'] = image_ids
    
    output_file = os.path.join(os.path.dirname(dataset_path), 'glcm_ngtdm_features_full.csv')
    df.to_csv(output_file, index=False)
    print(f"Features saved in {output_file}")

if __name__ == "__main__":
    main()
