In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
from skimage.filters import sobel, gabor
from skimage.color import rgb2hsv
from skimage.measure import shannon_entropy
from skimage.util import img_as_ubyte

In [2]:
def extract_features(image_path):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    # Convert to RGB and Grayscale
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_hsv = rgb2hsv(image_rgb)

    features = {}

    # 1. Brightness (mean of V channel in HSV)
    features['brightness'] = np.mean(image_hsv[:, :, 2])

    # 2. Saturation (mean of S channel in HSV)
    features['saturation'] = np.mean(image_hsv[:, :, 1])

    # 3. Noise Level (Shannon entropy of grayscale image)
    features['noise_level'] = shannon_entropy(image_gray)

    # 4. Blur Metric (variance of Laplacian)
    features['blur_metric'] = cv2.Laplacian(image_gray, cv2.CV_64F).var()

    # 5. Edge Strength (mean of Sobel edge detection)
    edge_map = sobel(image_gray)
    features['edge_strength'] = np.mean(edge_map)

    # 6. Motion Blur (real estimate using variance of Sobel filters)
    sobel_x = cv2.Sobel(image_gray, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(image_gray, cv2.CV_64F, 0, 1, ksize=3)
    var_x = np.var(sobel_x)
    var_y = np.var(sobel_y)
    features['motion_blur'] = var_x / (var_y + 1e-6)

    # 7–12. Local Binary Patterns (mean & variance for radii 1, 2, 3)
    for radius in [1, 2, 3]:
        lbp = local_binary_pattern(image_gray, P=8 * radius, R=radius, method="uniform")
        features[f'lbp_mean_r{radius}'] = np.mean(lbp)
        features[f'lbp_var_r{radius}'] = np.var(lbp)

    # 13–14. Edge stats from Sobel
    features['edges_mean'] = np.mean(edge_map)
    features['edges_var'] = np.var(edge_map)

    # 15–20. Color histogram mean & var for R, G, B
    for i, color in enumerate(['red', 'green', 'blue']):
        channel = image_rgb[:, :, i]
        features[f'{color}_mean'] = np.mean(channel)
        features[f'{color}_var'] = np.var(channel)

    # --- NEW FEATURES (21–26) ---

    # 21. Dark Channel Prior (mean)
    min_per_channel = np.min(image_rgb, axis=2)
    dcp = cv2.erode(min_per_channel, np.ones((15, 15)))
    features['dark_channel_mean'] = np.mean(dcp)

    # 22. Edge Density
    edges_binary = cv2.Canny(image_gray, 100, 200)
    features['edge_density'] = np.sum(edges_binary > 0) / edges_binary.size

    # 23. Global Contrast (std dev of grayscale)
    features['contrast'] = np.std(image_gray)

    # 24–25. Hue Mean and Variance
    hue_channel = image_hsv[:, :, 0]
    features['hue_mean'] = np.mean(hue_channel)
    features['hue_var'] = np.var(hue_channel)

    # 26. Gabor Energy (mean squared magnitude at 0°)
    gabor_response, _ = gabor(image_gray, frequency=0.6)
    features['gabor_energy'] = np.mean(gabor_response ** 2)

    return features

def extract_features_to_csv(image_folder, output_csv):
    all_features = []

    for category in os.listdir(image_folder):
        category_path = os.path.join(image_folder, category)
        if os.path.isdir(category_path):
            for image_name in os.listdir(category_path):
                image_path = os.path.join(category_path, image_name)
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                    print(f"Processing {image_name} in category {category}...")
                    try:
                        features = extract_features(image_path)
                        features['image_name'] = image_name
                        features['category'] = category
                        all_features.append(features)
                    except Exception as e:
                        print(f"Error processing {image_name}: {e}")

    df = pd.DataFrame(all_features)
    df.to_csv(output_csv, index=False)
    print(f"Features saved to {output_csv}")

# Example usage
image_folder = "D:/fy_project1/data_sets/artificial"
output_csv = "D:/fy_project1/data3.csv"
extract_features_to_csv(image_folder, output_csv)

Processing 0025.jpg in category clear...
Processing 0039.jpg in category clear...
Processing 0043.jpg in category clear...
Processing 0058.jpg in category clear...
Processing 0062.jpg in category clear...
Processing 0070.jpg in category clear...
Processing 0074.jpg in category clear...
Processing 0077.jpg in category clear...
Processing 0078.jpg in category clear...
Processing 0080.jpg in category clear...
Processing 0122.jpg in category clear...
Processing 0123.jpg in category clear...
Processing 0129.jpg in category clear...
Processing 0138.jpg in category clear...
Processing 0140.jpg in category clear...
Processing 0141.jpg in category clear...
Processing 0176.jpg in category clear...
Processing 0179.jpg in category clear...
Processing 0190.jpg in category clear...
Processing 0202.jpg in category clear...
Processing 0207.jpg in category clear...
Processing 0208.jpg in category clear...
Processing 0213.jpg in category clear...
Processing 0215.jpg in category clear...
Processing 0227.

In [3]:
def extract_features(image_path):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    # Convert to RGB and Grayscale
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_hsv = rgb2hsv(image_rgb)

    features = {}

    # 1. Brightness (mean of V channel in HSV)
    features['brightness'] = np.mean(image_hsv[:, :, 2])

    # 2. Saturation (mean of S channel in HSV)
    features['saturation'] = np.mean(image_hsv[:, :, 1])

    # 3. Noise Level (Shannon entropy of grayscale image)
    features['noise_level'] = shannon_entropy(image_gray)

    # 4. Blur Metric (variance of Laplacian)
    features['blur_metric'] = cv2.Laplacian(image_gray, cv2.CV_64F).var()

    # 5. Edge Strength (mean of Sobel edge detection)
    edge_map = sobel(image_gray)
    features['edge_strength'] = np.mean(edge_map)

    # 6. Motion Blur (real estimate using variance of Sobel filters)
    sobel_x = cv2.Sobel(image_gray, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(image_gray, cv2.CV_64F, 0, 1, ksize=3)
    var_x = np.var(sobel_x)
    var_y = np.var(sobel_y)
    features['motion_blur'] = var_x / (var_y + 1e-6)

    # 7–12. Local Binary Patterns (mean & variance for radii 1, 2, 3)
    for radius in [1, 2, 3]:
        lbp = local_binary_pattern(image_gray, P=8 * radius, R=radius, method="uniform")
        features[f'lbp_mean_r{radius}'] = np.mean(lbp)
        features[f'lbp_var_r{radius}'] = np.var(lbp)

    # 13–14. Edge stats from Sobel
    features['edges_mean'] = np.mean(edge_map)
    features['edges_var'] = np.var(edge_map)

    # 15–20. Color histogram mean & var for R, G, B
    for i, color in enumerate(['red', 'green', 'blue']):
        channel = image_rgb[:, :, i]
        features[f'{color}_mean'] = np.mean(channel)
        features[f'{color}_var'] = np.var(channel)

    # --- NEW FEATURES (21–26) ---

    # 21. Dark Channel Prior (mean)
    min_per_channel = np.min(image_rgb, axis=2)
    dcp = cv2.erode(min_per_channel, np.ones((15, 15)))
    features['dark_channel_mean'] = np.mean(dcp)

    # 22. Edge Density
    edges_binary = cv2.Canny(image_gray, 100, 200)
    features['edge_density'] = np.sum(edges_binary > 0) / edges_binary.size

    # 23. Global Contrast (std dev of grayscale)
    features['contrast'] = np.std(image_gray)

    # 24–25. Hue Mean and Variance
    hue_channel = image_hsv[:, :, 0]
    features['hue_mean'] = np.mean(hue_channel)
    features['hue_var'] = np.var(hue_channel)

    # 26. Gabor Energy (mean squared magnitude at 0°)
    gabor_response, _ = gabor(image_gray, frequency=0.6)
    features['gabor_energy'] = np.mean(gabor_response ** 2)

    return features

def extract_features_to_csv(image_folder, output_csv):
    all_features = []

    for category in os.listdir(image_folder):
        category_path = os.path.join(image_folder, category)
        if os.path.isdir(category_path):
            for image_name in os.listdir(category_path):
                image_path = os.path.join(category_path, image_name)
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                    print(f"Processing {image_name} in category {category}...")
                    try:
                        features = extract_features(image_path)
                        features['image_name'] = image_name
                        features['category'] = category
                        all_features.append(features)
                    except Exception as e:
                        print(f"Error processing {image_name}: {e}")

    df = pd.DataFrame(all_features)
    df.to_csv(output_csv, index=False)
    print(f"Features saved to {output_csv}")

# Example usage
image_folder = "D:/fy_project1/data_sets/real"
output_csv = "D:/fy_project1/data4.csv"
extract_features_to_csv(image_folder, output_csv)

Processing 0025.jpg in category clear...
Processing 0039.jpg in category clear...
Processing 0043.jpg in category clear...
Processing 0058.jpg in category clear...
Processing 0062.jpg in category clear...
Processing 0070.jpg in category clear...
Processing 0074.jpg in category clear...
Processing 0077.jpg in category clear...
Processing 0078.jpg in category clear...
Processing 0080.jpg in category clear...
Processing 0122.jpg in category clear...
Processing 0123.jpg in category clear...
Processing 0129.jpg in category clear...
Processing 0138.jpg in category clear...
Processing 0140.jpg in category clear...
Processing 0141.jpg in category clear...
Processing 0176.jpg in category clear...
Processing 0179.jpg in category clear...
Processing 0190.jpg in category clear...
Processing 0202.jpg in category clear...
Processing 0207.jpg in category clear...
Processing 0208.jpg in category clear...
Processing 0213.jpg in category clear...
Processing 0215.jpg in category clear...
Processing 0227.