In [2]:
import os
import cv2
import numpy as np
from PIL import Image
import fitz  # PyMuPDF

In [6]:
# Define paths
input_folder = 'data_kk'
output_blur = 'kk_blur'
output_noise = 'kk_noise'
output_normal = 'kk_normal'

# Create output folders if they don't exist
os.makedirs(output_blur, exist_ok=True)
os.makedirs(output_noise, exist_ok=True)
os.makedirs(output_normal, exist_ok=True)

def sobel_gradient(image):
    """Convert image to Sobel gradient."""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sobel_x = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=5)
    sobel_y = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=5)
    sobel_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
    return np.mean(sobel_magnitude)

def process_image(file_path):
    """Process and convert image to Sobel form."""
    if file_path.lower().endswith('.pdf'):
        # Convert PDF to images
        doc = fitz.open(file_path)
        for page_num in range(len(doc)):
            page = doc[page_num]
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            yield img
    else:
        # Read image
        image = cv2.imread(file_path)
        yield image

# Dictionary to hold image scores
image_scores = {}

# Process each image in the input folder
for filename in os.listdir(input_folder):
    file_path = os.path.join(input_folder, filename)
    for img in process_image(file_path):
        sobel_score = sobel_gradient(img)
        image_scores[file_path] = sobel_score

# Sort images based on Sobel scores
sorted_images = sorted(image_scores.items(), key=lambda x: x[1])

image_scores

{'data_kk\\1253481.jpg': 273.1896166906285,
 'data_kk\\1363796.jpeg': 190.7322558700792,
 'data_kk\\anime-girl-mermaid-underwater-fantasy-2k-wallpaper-uhdpaper.com-557@2@a.jpg': 679.3636263327845,
 'data_kk\\wallpaperflare.com_wallpaper (1).jpg': 128.30303202876925,
 'data_kk\\wallpaperflare.com_wallpaper (2).jpg': 182.57190556392698,
 'data_kk\\wallpaperflare.com_wallpaper (3).jpg': 108.92580889137267,
 'data_kk\\wallpaperflare.com_wallpaper (4).jpg': 148.1001132691396,
 'data_kk\\wallpaperflare.com_wallpaper (5).jpg': 246.73412591418602,
 'data_kk\\wallpaperflare.com_wallpaper.jpg': 284.74186040999984}

In [None]:
# Process each image in the input folder
for filename in os.listdir(input_folder):
    file_path = os.path.join(input_folder, filename)
    for img in process_image(file_path):
        sobel_score = sobel_gradient(img)
        image_scores[file_path] = sobel_score

# Sort images based on Sobel scores
sorted_images = sorted(image_scores.items(), key=lambda x: x[1])

# Assign images to respective folders
num_images = len(sorted_images)
for i, (file_path, score) in enumerate(sorted_images):
    if i == 0:  # Lowest Sobel score
        output_path = os.path.join(output_blur, os.path.basename(file_path))
    elif i == num_images - 1:  # Highest Sobel score
        output_path = os.path.join(output_noise, os.path.basename(file_path))
    else:  # Middle Sobel scores
        output_path = os.path.join(output_normal, os.path.basename(file_path))

    # Save the image
    if file_path.lower().endswith('.pdf'):
        # Save the Sobel image as a PNG
        img = process_image(file_path).__next__()
        sobel_img = sobel_gradient(img)  # Obtain the Sobel image
        cv2.imwrite(output_path.replace('.pdf', '.png'), img)
    else:
        # Save the Sobel image
        cv2.imwrite(output_path, cv2.imread(file_path))

print("Processing complete.")