In [1]:
import os
import cv2
import glob
import numpy as np
import pandas as pd
from skimage.filters import sobel
from skimage import img_as_float
from scipy.stats import skew, kurtosis, entropy
from concurrent.futures import ThreadPoolExecutor


In [2]:
def extract_features(img_path, class_label, resolution_level):
    """Extract features from a single image."""
    try:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"‚ö†Ô∏è Skipping unreadable image: {img_path}")
            return None

        # Basic attributes
        h, w = img.shape
        aspect_ratio = round(w / h, 6)
        file_size_kb = round(os.path.getsize(img_path) / 1024, 6)

        # Intensity-based features
        pixels = img.flatten()
        mean_intensity = round(np.mean(pixels) / 255, 6)
        std_intensity = round(np.std(pixels) / 255, 6)
        skewness = round(skew(pixels), 6)
        kurt = round(kurtosis(pixels), 6)
        ent = round(entropy(np.histogram(pixels, bins=256)[0] + 1), 6)

        # Edge detection
        edges = sobel(img_as_float(img))
        edge_density = round(np.mean(edges > 0.1), 6)

        return {
            "file_name": os.path.basename(img_path),
            "class_label": class_label,
            "resolution_level": resolution_level,
            "width": w,
            "height": h,
            "aspect_ratio": aspect_ratio,
            "file_size_kb": file_size_kb,
            "mean_intensity": mean_intensity,
            "std_intensity": std_intensity,
            "skewness": skewness,
            "kurtosis": kurt,
            "entropy": ent,
            "edge_density": edge_density
        }

    except Exception as e:
        print(f"‚ö†Ô∏è Error processing {img_path}: {e}")
        return None

In [3]:
def process_all_images(base_folder, output_csv, max_workers=8):
    """Scan all class folders and extract image features."""
    class_folders = [
        "Canon120-1",
        "Canon120-2",
        "Canon220",
        "Canon9000-1",
        "Canon9000-2",
        "EpsonV39-1",
        "EpsonV39-2",
        "EpsonV370-1",
        "EpsonV370-2",
        "EpsonV550",
        "HP"
    ]

    subfolders = ["150", "300"]
    valid_exts = ('.tif', '.tiff', '.jpg', '.jpeg', '.png')

    all_images = []

    # Collect all images
    for class_folder in class_folders:
        class_path = os.path.join(base_folder, class_folder)
        if not os.path.exists(class_path):
            print(f"‚ö†Ô∏è Skipping missing folder: {class_folder}")
            continue

        for subfolder in subfolders:
            sub_path = os.path.join(class_path, subfolder)
            if not os.path.exists(sub_path):
                print(f"‚ö†Ô∏è Subfolder not found: {sub_path}")
                continue

            for ext in valid_exts:
                images = glob.glob(os.path.join(sub_path, f"*{ext}"))
                for img_path in images:
                    all_images.append((img_path, class_folder, subfolder))

    print(f"\nüñºÔ∏è Total images found: {len(all_images)}\n")

    data = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = executor.map(lambda x: extract_features(*x), all_images)
        for result in results:
            if result:
                data.append(result)
                df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    print(f"\n‚úÖ Feature extraction complete. Saved to: {output_csv}")
    print(f"üìä Total images processed: {len(df)}")

    if not df.empty:
        print("\nüìã Sample preview of data:\n")
        print(df.head(10))
    else:
        print("‚ö†Ô∏è No valid images were processed.")

In [4]:

# === RUN ===
base_folder = r"D:\Infosys_AI-Tracefinder\Data\Official"
output_csv = r"D:\Infosys_AI-Tracefinder\Output\Output_for_Allfiles.csv"

process_all_images(base_folder, output_csv, max_workers=8)

‚ö†Ô∏è Subfolder not found: D:\Infosys_AI-Tracefinder\Data\Official\Canon220\300

üñºÔ∏è Total images found: 510


‚úÖ Feature extraction complete. Saved to: D:\Infosys_AI-Tracefinder\Output\Output_for_Allfiles.csv
üìä Total images processed: 510

üìã Sample preview of data:

    file_name class_label resolution_level  width  height  aspect_ratio  \
0    s1_1.tif  Canon120-1              150   1240    1752      0.707763   
1  s1_100.tif  Canon120-1              150   1240    1752      0.707763   
2   s1_11.tif  Canon120-1              150   1240    1752      0.707763   
3   s1_16.tif  Canon120-1              150   1240    1752      0.707763   
4   s1_17.tif  Canon120-1              150   1240    1752      0.707763   
5    s1_2.tif  Canon120-1              150   1240    1752      0.707763   
6   s1_20.tif  Canon120-1              150   1240    1752      0.707763   
7   s1_25.tif  Canon120-1              150   1240    1752      0.707763   
8    s1_3.tif  Canon120-1              150   