# ImageRevive 2.0: AI-Powered Image Restoration
# Notebook 2: EDA and Pre-Processing

**Course**: AAI-521 Computer Vision  
**Focus**: Exploratory Data Analysis and Data Preprocessing  

---

## Table of Contents
1. [Data Loading](#loading)
2. [Exploratory Data Analysis](#eda)
3. [Image Statistics](#stats)
4. [Data Augmentation](#augmentation)
5. [Preprocessing Pipeline](#pipeline)
6. [Train/Val Split](#split)

In [1]:
# PEP 8: Imports organized in groups
# Standard library
import os
import random
from pathlib import Path
from typing import List, Tuple, Dict, Optional

# Third-party scientific computing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm

# Image processing
import cv2
from PIL import Image
from skimage import metrics, color, exposure
from skimage.util import random_noise

# Deep learning
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# Configuration
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Set random seeds for reproducibility (PEP 8: Clear comments)
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

print("✓ Imports successful")

✓ Imports successful


## 1. Data Loading {#loading}

In [2]:
# PEP 8: Constants in UPPER_CASE
DATA_DIR = Path('./ImageRevive/data')
RAW_DIR = DATA_DIR / 'raw'
PROCESSED_DIR = DATA_DIR / 'processed'
BENCHMARKS_DIR = DATA_DIR / 'benchmarks'

# Image extensions to process
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff'}

def load_images_from_directory(directory: Path,
                               max_images: Optional[int] = None) -> List[Path]:
    """
    Load image paths from directory.

    Args:
        directory: Directory containing images
        max_images: Maximum number of images to load

    Returns:
        List of image paths

    PEP 8: Type hints, docstring with Args/Returns
    """
    image_paths = []

    if not directory.exists():
        print(f"Directory not found: {directory}")
        return image_paths

    for ext in IMAGE_EXTENSIONS:
        image_paths.extend(directory.glob(f'*{ext}'))

    if max_images:
        image_paths = image_paths[:max_images]

    return sorted(image_paths)

# Load benchmark images
set5_images = load_images_from_directory(BENCHMARKS_DIR / 'Set5')
set14_images = load_images_from_directory(BENCHMARKS_DIR / 'Set14')

print(f"Set5 images: {len(set5_images)}")
print(f"Set14 images: {len(set14_images)}")

Directory not found: ImageRevive/data/benchmarks/Set5
Directory not found: ImageRevive/data/benchmarks/Set14
Set5 images: 0
Set14 images: 0
