In [None]:
"""
Image Characteristic Extraction for Image Deblurring Performance Analysis
-------------------------------------------------------------------------

Project: Statistical Analysis of Image Deblurring Methods Performance
Dataset: 1000 high-resolution and diverse images from HQ-50K
Goal: Extract image-level features (contrast and edge density) to enable
statistical correlations between image characteristics, blur types,
deblurring methods (classical and AI), and output quality metrics.

Scope of this script:
This module is part of the "Input Feature Generation" phase and focuses on:
- image_contrast: global contrast of the original image
- edge_density: edge detail intensity of the original image

These are **independent variables** in our dataset and are critical for:
- understanding how image content affects perceived blur
- analyzing performance variation across deblurring methods
- grouping images by visual complexity and texture

Chosen methods:

1. RMS Contrast:
   - What it measures: global variation in pixel intensity (grayscale)
   - Why it's appropriate: blur reduces intensity variance; RMS contrast
     is continuous, robust, and invariant to illumination changes
   - Chosen over Michelson/local contrast due to:
     • better stability across natural scenes
     • no assumption of foreground/background separation

2. Sobel Gradient Magnitude (Edge Density):
   - What it measures: average strength of image gradients (edges)
   - Why it's appropriate: blur directly weakens gradient transitions;
     this metric quantifies edge loss
   - Chosen over Canny or binary edge maps because:
     • no threshold tuning
     • produces continuous values for statistical modeling
     • better suited for noisy or textured images
   - Gaussian smoothing is applied before Sobel to reduce spurious gradients

These features are extracted from **original high-resolution images**
(before blur or deblurring), aligned with the project roadmap.

Expected ranges:
- RMS contrast: 0.1–0.3 for typical photos, >0.4 for highly contrasted scenes
- Edge density: 10–50 average gradient magnitude for natural images

Output: a CSV file with image_id, image_contrast, edge_density
To be used later in correlation analysis and performance prediction models.

Author: Giorgio — Data Science & A.I., SUPSI
"""

import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import logging

# Gaussian filter to suppress noise before edge detection
GAUSSIAN_PARAMS = {
    'ksize': (3, 3),
    'sigma': 1.0
}

# Sobel kernel size
SOBEL_PARAMS = {
    'ksize': 3
}

# Configure logging
logging.basicConfig(
    filename='feature_extraction_errors.log',
    level=logging.ERROR,
    format='%(asctime)s - %(levelname)s - %(message)s'
)


def compute_rms_contrast(image: np.ndarray) -> float:
    """
    Calculates Root Mean Square (RMS) contrast for a grayscale image.

    Args:
        image: Grayscale image as NumPy array

    Returns:
        float: RMS contrast value
    """
    img_float = image.astype(np.float32) / 255.0
    mean_intensity = np.mean(img_float)
    contrast = np.sqrt(np.mean((img_float - mean_intensity) ** 2))
    return contrast


def compute_sobel_edge_density(
    image: np.ndarray,
    gaussian_params: dict,
    sobel_params: dict
) -> float:
    """
    Calculates average gradient magnitude using Sobel filters.

    Args:
        image: Grayscale image as NumPy array
        gaussian_params: Gaussian blur settings
        sobel_params: Sobel kernel size

    Returns:
        float: Average gradient (edge strength)
    """
    img_blur = cv2.GaussianBlur(
        image,
        gaussian_params['ksize'],
        sigmaX=gaussian_params['sigma']
    )

    sobel_x = cv2.Sobel(
        img_blur,
        cv2.CV_64F,
        1, 0,
        ksize=sobel_params['ksize']
    )

    sobel_y = cv2.Sobel(
        img_blur,
        cv2.CV_64F,
        0, 1,
        ksize=sobel_params['ksize']
    )

    gradient_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
    return np.mean(gradient_magnitude)


def extract_features_from_dataset(
    dataset_csv: str,
    image_dir: str,
    output_csv: str = "image_features.csv"
) -> pd.DataFrame:
    """
    Extracts image_contrast and edge_density from images listed in a dataset CSV.

    Args:
        dataset_csv: CSV file containing at least 'id' and 'format' columns
        image_dir: Directory where image files are stored
        output_csv: Path to save extracted features

    Returns:
        DataFrame with image_id, image_contrast, edge_density
    """
    df = pd.read_csv(dataset_csv)
    results = []

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Extracting features"):
        image_id = row['id']
        img_format = row['format'].lower()

        ext = '.jpg' if img_format == 'jpeg' else f'.{img_format}'
        image_path = os.path.join(image_dir, f"{image_id}{ext}")

        if not os.path.exists(image_path):
            logging.error(f"File not found: {image_path}")
            continue

        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        if img is None:
            logging.error(f"Failed to load image ID {image_id}")
            continue

        try:
            contrast = compute_rms_contrast(img)
            edges = compute_sobel_edge_density(
                img,
                GAUSSIAN_PARAMS,
                SOBEL_PARAMS
            )

            results.append({
                "image_id": image_id,
                "image_contrast": contrast,
                "edge_density": edges
            })

        except Exception as e:
            logging.error(f"Error processing image ID {image_id}: {e}")

    df_result = pd.DataFrame(results)
    df_result.to_csv(output_csv, index=False)

    print(f"\n✅ Feature extraction complete. Saved to: {output_csv}")
    return df_result



ciao
