### DETAILED EXPLANATION

This script generates blurred versions of each image in the dataset using three types of blur:

**Why Blurring Images?**

Blurring is a common type of image distortion that can occur due to various factors:

- Camera motion during capture (Motion Blur)
- Out-of-focus regions due to depth of field (Gaussian Blur)
- Uniform blurring due to poor image processing or resizing (Box Blur)

These blurs simulate real-world distortions, making our analysis more realistic.

**Types of Blur Used**

1. Gaussian Blur:
   - This blur applies a Gaussian kernel to each pixel, which results in a smooth, out-of-focus effect.
   - The blur is strongest at the center and gradually fades out (bell curve distribution).
   - We chose sigma range (0.5 to 3.0) because it provides visible but controlled blur.
   - Sigma defines the standard deviation of the Gaussian distribution (higher = stronger blur).

2. Motion Blur:
   - Simulates the effect of movement, either from a moving object or a moving camera.
   - The blur has a clear direction (angle) and intensity (length).
   - We chose length (5 to 30) to allow visible motion but not too extreme.
   - Angle (0 to 180) provides all possible directions of movement.

3. Box Blur:
   - Applies a simple averaging of pixel values in a square region around each pixel.
   - This blur is fast and commonly used for quick blur effects.
   - We chose kernel size (3 to 15) because it provides a visible blur without being too extreme.
   - The kernel size defines the square region used for averaging (higher = stronger blur).

**Why Save Parameters in a Parquet File?**

- Parquet is optimized for speed and storage efficiency.
- It is suitable for large datasets, ensuring fast read and write operations.
- Parquet allows us to store the blur parameters alongside the image identifiers, making it easy to track and analyze.

In [None]:
# Blur Generation for Image Deblurring Performance Analysis

import cv2
import numpy as np
import pandas as pd
import os
from pathlib import Path
import random
from tqdm import tqdm
import logging
import sys
from datasets import load_dataset
from PIL import Image

# Aggiunge il path root del progetto per usare i moduli personalizzati
sys.path.append("../..")
import utils.constants as const

# Logging
logging.basicConfig(
    filename='logs/3_blur_generation.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Percorsi
TEMP_DIR = Path("temp_images")
BLURRED_DIR = const.BLURRED_DATASET_PATH
DATASET_FILE = const.MAIN_DATASET_PATH

TEMP_DIR.mkdir(parents=True, exist_ok=True)
BLURRED_DIR.mkdir(parents=True, exist_ok=True)

# Intervalli dei parametri per i blur
PARAM_RANGES = {
    'gaussian': {'sigma': (0.5, 5.0)},
    'motion': {'length': (5, 50), 'angle': (0, 180)},
    'box': {'kernel_size': (3, 15)}
}

# Generazione kernel

def gen_gaussian_kernel(size, sigma):
    k1d = cv2.getGaussianKernel(size, sigma)
    kernel = k1d @ k1d.T
    return kernel

def gen_motion_kernel(size, length, angle):
    kernel = np.zeros((size, size), dtype=np.float32)
    center = (size - 1) / 2
    angle_rad = np.deg2rad(angle)
    dx = length / 2 * np.cos(angle_rad)
    dy = length / 2 * np.sin(angle_rad)
    start_point = (int(center - dx), int(center - dy))
    end_point = (int(center + dx), int(center + dy))
    cv2.line(kernel, start_point, end_point, 1, 1)
    kernel /= np.sum(kernel)
    return kernel

def gen_box_kernel(size):
    return np.ones((size, size), dtype=np.float32) / (size * size)

# Applicazione blur

def apply_kernel_blur(image, kernel):
    return cv2.filter2D(image, -1, kernel)

# Funzione principale

def process_images_from_huggingface(sample_size=10):
    ds = load_dataset("slymachenko/image-deblurring-performance-analysis", split="train", streaming=True)
    ds_head = ds.take(sample_size)

    blurred_metadata = []

    for img in tqdm(ds_head, total=sample_size, desc="Processing images"):
        if "image" not in img or "key" not in img:
            continue

        image_pil = img["image"]
        image_id = img["key"]
        image_path = TEMP_DIR / f"{image_id}.png"

        # Salva l'immagine originale localmente per OpenCV
        image_pil.save(image_path)

        image = cv2.imread(str(image_path))
        if image is None:
            continue

        # Gaussian Blur
        sigma = random.uniform(*PARAM_RANGES['gaussian']['sigma'])
        size_gauss = int(6 * sigma) | 1
        gaussian_kernel = gen_gaussian_kernel(size_gauss, sigma)
        gaussian_blurred = apply_kernel_blur(image, gaussian_kernel)
        cv2.imwrite(str(BLURRED_DIR / f"{image_id}_gaussian.png"), gaussian_blurred)
        blurred_metadata.append({"image_id": image_id, "blur_type": "gaussian", "gaussian_sigma": sigma})

        # Motion Blur
        length = random.randint(*PARAM_RANGES['motion']['length'])
        angle = random.uniform(*PARAM_RANGES['motion']['angle'])
        size_motion = max(3, int(length) | 1)
        motion_kernel = gen_motion_kernel(size_motion, length, angle)
        motion_blurred = apply_kernel_blur(image, motion_kernel)
        cv2.imwrite(str(BLURRED_DIR / f"{image_id}_motion.png"), motion_blurred)
        blurred_metadata.append({"image_id": image_id, "blur_type": "motion", "motion_length": length, "motion_angle": angle})

        # Box Blur
        kernel_size = random.choice(range(PARAM_RANGES['box']['kernel_size'][0], PARAM_RANGES['box']['kernel_size'][1] + 1, 2))
        box_kernel = gen_box_kernel(kernel_size)
        box_blurred = apply_kernel_blur(image, box_kernel)
        cv2.imwrite(str(BLURRED_DIR / f"{image_id}_box.png"), box_blurred)
        blurred_metadata.append({"image_id": image_id, "blur_type": "box", "box_kernel_size": kernel_size})

    df_blur = pd.DataFrame(blurred_metadata)
    df_blur.to_parquet("data/image_deblurring_blur_parameters.parquet", index=False)
    print("\nBlur generation completed and metadata saved.")

process_images_from_huggingface(sample_size=10)

