In [None]:
# ====================== Super-Resolution Dataset Degradation Pipeline ======================
import os, random, argparse, math
from PIL import Image, ImageFilter, ImageOps
import numpy as np
import glob
import shutil
from tqdm import tqdm


# ============================== Blur Augmentations ==============================
def random_gaussian_blur(img, radius_min=0.3, radius_max=2.5, p=0.6):
    if random.random() > p: return img
    r = random.uniform(radius_min, radius_max)
    return img.filter(ImageFilter.GaussianBlur(radius=r))


def random_motion_blur(img, max_kernel=15, p=0.3):
    if random.random() > p: return img
    times = random.randint(1,3)
    for _ in range(times):
        img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5,2.0)))
    return img


# ============================== Noise & Compression ==============================
def random_noise_np(img_np, sigma_min=2, sigma_max=25, p=0.6):
    if random.random() > p: return img_np
    sigma = random.uniform(sigma_min, sigma_max)
    noise = np.random.normal(0, sigma, img_np.shape).astype(np.float32)
    out = img_np.astype(np.float32) + noise
    out = np.clip(out, 0, 255).astype(np.uint8)
    return out


def random_jpeg_compress(img, q_min=30, q_max=95, p=0.7):
    if random.random() > p: return img
    q = random.randint(q_min, q_max)
    from io import BytesIO
    buf = BytesIO()
    img.save(buf, format='JPEG', quality=q)
    buf.seek(0)
    return Image.open(buf).convert('RGB')


# ============================== Resolution Scaling ==============================
def random_downsample(img, scale=4, method=None):
    if method is None:
        method = random.choice(['bicubic','bilinear','nearest'])
    w,h = img.size
    neww, newh = w//scale, h//scale
    if method=='bicubic':
        return img.resize((neww,newh), Image.BICUBIC)
    elif method=='bilinear':
        return img.resize((neww,newh), Image.BILINEAR)
    else:
        return img.resize((neww,newh), Image.NEAREST)


def random_upscale(img, scale=4):
    w,h = img.size
    return img.resize((w*scale,h*scale), Image.BICUBIC)


# ============================== Degradation Pipeline ==============================
def degrade_image(hr_img, scale=4):
    img = hr_img.copy()
    img = random_gaussian_blur(img, radius_min=0.2, radius_max=2.0, p=0.8)
    img = random_motion_blur(img, max_kernel=15, p=0.25)
    lr = random_downsample(img, scale=scale)
    lr_np = np.array(lr)
    lr_np = random_noise_np(lr_np, sigma_min=1, sigma_max=12, p=0.6)
    lr = Image.fromarray(lr_np)
    lr = random_jpeg_compress(lr, q_min=30, q_max=95, p=0.7)
    if random.random() < 0.4:
        lr = ImageOps.autocontrast(lr)
    return lr


# ============================== Dataset Preparation ==============================
def prepare(hr_dir, out_dir, scale=4, split=(0.85,0.10,0.05), force=False):
    os.makedirs(out_dir, exist_ok=True)
    processed_hr = os.path.join(out_dir, 'HR')
    processed_lr = os.path.join(out_dir, 'LR')
    for p in [processed_hr, processed_lr]:
        os.makedirs(p, exist_ok=True)

    imgs = sorted(glob.glob(os.path.join(hr_dir, '*.*')))
    if len(imgs) == 0:
        raise ValueError("No images found in HR dir")
    random.shuffle(imgs)

    n = len(imgs)
    n1 = int(n * split[0])
    n2 = n1 + int(n * split[1])

    sets = {
        'train': imgs[:n1],
        'val': imgs[n1:n2],
        'test': imgs[n2:]
    }

    for subset, files in sets.items():
        subset_hr = os.path.join(processed_hr, subset); os.makedirs(subset_hr, exist_ok=True)
        subset_lr = os.path.join(processed_lr, subset); os.makedirs(subset_lr, exist_ok=True)
        print(f"Processing {subset}: {len(files)} images")
        for p in tqdm(files):
            try:
                img = Image.open(p).convert('RGB')
            except Exception as e:
                print("skip", p, e); continue
            w,h = img.size
            w = (w//scale)*scale; h = (h//scale)*scale
            if w==0 or h==0: continue
            img = img.crop((0,0,w,h))

            base = os.path.splitext(os.path.basename(p))[0]
            hr_path = os.path.join(subset_hr, base + '.png')
            img.save(hr_path, format='PNG')

            lr_img = degrade_image(img, scale=scale)
            lr_path = os.path.join(subset_lr, base + '.png')
            lr_img.save(lr_path, format='PNG')
    print("Done. Processed data in:", out_dir)


# ============================== Entry Point ==============================
if __name__ == "__main__":
    hr = r"D:\Projects\Enhance photo\HQ_augmented22"
    out = r"D:\Projects\Enhance photo\HQ_augmented_Out"
    scale = 4
    prepare(hr, out, scale=scale)

Processing train: 1656 images


100%|██████████| 1656/1656 [2:24:10<00:00,  5.22s/it]  


Processing val: 194 images


100%|██████████| 194/194 [17:18<00:00,  5.35s/it]


Processing test: 99 images


100%|██████████| 99/99 [07:19<00:00,  4.44s/it]

Done. Processed data in: D:\Projects\Enhance photo\HQ_augmented_Out





In [None]:
# ====================== Person-Aware Degradation Using Semantic Segmentation ======================
import os, glob
from PIL import Image, ImageFilter
import numpy as np
import torch
import torchvision.transforms as T
from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
from tqdm import tqdm
import torch.backends.cudnn as cudnn


# ============================== CUDA & Runtime Configuration ==============================
cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# ============================== Segmentation Model Initialization ==============================
weights = DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1
seg_model = deeplabv3_resnet50(weights=weights).eval().to(device)
transform = weights.transforms()


# ============================== Person Mask Extraction ==============================
def get_person_mask(img_pil, threshold=0.5):
    """Fast mask extractor: downsamples HR before segmentation."""
    
    small = img_pil.resize((512, 512), Image.BILINEAR)
    img = transform(small).unsqueeze(0).to(device)

    with torch.no_grad():
        output = seg_model(img)["out"][0]
        person_mask = output[15]
        mask_small = (person_mask.sigmoid().cpu().numpy() > threshold).astype(np.uint8)

    mask = Image.fromarray(mask_small * 255)
    mask = mask.resize(img_pil.size, Image.NEAREST)

    return (np.array(mask) > 128).astype(np.uint8)


# ============================== Person-Region Degradation ==============================
def degrade_person_region(img_pil):
    img = img_pil.filter(ImageFilter.GaussianBlur(radius=0.4))

    img_np = np.array(img)
    img_np = img_np + np.random.normal(0, 1.5, img_np.shape)
    img_np = np.clip(img_np, 0, 255).astype(np.uint8)

    img = Image.fromarray(img_np)

    from io import BytesIO
    buf = BytesIO()
    img.save(buf, format="JPEG", quality=94)
    buf.seek(0)

    return Image.open(buf).convert("RGB")


# ============================== LR Processing with HR-Guided Masking ==============================
def process_existing_lr(lr_dir, hr_dir, out_dir):
    os.makedirs(out_dir, exist_ok=True)
    lr_files = sorted(glob.glob(os.path.join(lr_dir, "*.*")))

    for lr_path in tqdm(lr_files):
        base = os.path.splitext(os.path.basename(lr_path))[0]
        hr_path = os.path.join(hr_dir, base + ".png")

        if not os.path.exists(hr_path):
            continue

        lr_img = Image.open(lr_path).convert("RGB")
        hr_img = Image.open(hr_path).convert("RGB")

        mask = get_person_mask(hr_img)

        mask_pil = Image.fromarray(mask * 255)
        mask_resized = mask_pil.resize(lr_img.size, Image.NEAREST)
        mask = (np.array(mask_resized) > 128).astype(np.uint8)

        lr_np = np.array(lr_img)
        person_region = lr_np * mask[:, :, None]
        person_pil = Image.fromarray(person_region)

        person_pil = degrade_person_region(person_pil)

        final_np = np.array(lr_img)
        person_np = np.array(person_pil)
        final_np[mask == 1] = person_np[mask == 1]

        final_img = Image.fromarray(final_np)
        final_img.save(os.path.join(out_dir, base + ".png"))

    print("DONE ✓ — Fast degradation applied to person only!")


# ============================== Entry Point ==============================
if __name__ == "__main__":
    lr_dir = r"D:\Projects\Enhance photo\HQ_augmented_Out\LR"
    hr_dir = r"D:\Projects\Enhance photo\HQ_augmented_Out\HR"
    out_dir = r"D:\Projects\Enhance photo\HQ_augmented_Out\OUT"
    process_existing_lr(lr_dir, hr_dir, out_dir)


100%|██████████| 1949/1949 [2:32:30<00:00,  4.69s/it]  

DONE ✓ — Fast degradation applied to person only!





In [None]:
# ======================= Image Hashing & Duplicate Detection =======================
import os
from PIL import Image
import imagehash


# ============================== Compute Image Hashes ==============================
def hash_images(folder, hash_size=8, algo="phash"):

    hashes = {}
    for fname in os.listdir(folder):
        path = os.path.join(folder, fname)
        try:
            img = Image.open(path)
            if algo == "phash":
                h = imagehash.phash(img, hash_size=hash_size)
            elif algo == "ahash":
                h = imagehash.average_hash(img, hash_size=hash_size)
            elif algo == "dhash":
                h = imagehash.dhash(img, hash_size=hash_size)
            elif algo == "whash":
                h = imagehash.whash(img, hash_size=hash_size)
            else:
                h = imagehash.phash(img, hash_size=hash_size)
        except Exception as e:
            print("Cannot open image:", path, e)
            continue
        hashes[path] = h
    return hashes


# ============================== Find Duplicate Groups ==============================
def find_duplicates(hashes, max_distance=5):

    paths = list(hashes.keys())
    dup_groups = []
    visited = set()
    for i in range(len(paths)):
        if paths[i] in visited:
            continue
        group = [paths[i]]
        for j in range(i+1, len(paths)):
            dist = hashes[paths[i]] - hashes[paths[j]]
            if dist <= max_distance:
                group.append(paths[j])
                visited.add(paths[j])
        if len(group) > 1:
            dup_groups.append(group)
    return dup_groups


# ============================== Main Execution ==============================
if __name__ == "__main__":
    hr_folder = r"D:\Projects\Enhance photo\data\HQ"
    lr_folder = r"D:\Projects\Enhance photo\data\LQ"

    print("Hashing HR images …")
    hr_hashes = hash_images(hr_folder, hash_size=8, algo="phash")
    print("Hashing LR images …")
    lr_hashes = hash_images(lr_folder, hash_size=8, algo="phash")

    print("Finding duplicates in HR …")
    hr_dupes = find_duplicates(hr_hashes, max_distance=5)
    for g in hr_dupes:
        print("Duplicate group HR:", g)

    print("Finding duplicates in LR …")
    lr_dupes = find_duplicates(lr_hashes, max_distance=5)
    for g in lr_dupes:
        print("Duplicate group LR:", g)

    hr_names = set([os.path.splitext(os.path.basename(p))[0] for p in hr_hashes.keys()])
    lr_names = set([os.path.splitext(os.path.basename(p))[0] for p in lr_hashes.keys()])
    missing_in_lr = hr_names - lr_names
    missing_in_hr = lr_names - hr_names
    print("Names in HR but not in LR:", missing_in_lr)
    print("Names in LR but not in HR:", missing_in_hr)


Hashing HR images …
Hashing LR images …
Finding duplicates in HR …
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\1_120.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\1_120_aug.png']
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\1_52.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\1_52_aug.png']
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\1_70.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\1_70_aug.png']
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\1_76.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\1_76_aug.png']
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\2_138.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\2_138_aug.png']
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\2_382.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\2_382_aug.png']
Duplicate group HR: ['D:\\Projects\\Enhance photo\\data\\HQ\\3_172.jpg', 'D:\\Projects\\Enhance photo\\data\\HQ\\3_172_aug.png']
Duplicate group HR: ['D:\\Projects\\

In [None]:
# ======================= Duplicate Groups Visualization =======================
import os
from PIL import Image
import matplotlib.pyplot as plt


# ============================== Display Duplicate Groups ==============================
def show_duplicate_groups(duplicate_groups, cols=3):

    for idx, group in enumerate(duplicate_groups, start=1):
        print(f"Group {idx} ({len(group)} images):")
        for p in group:
            print("   ", p)

        n = len(group)
        rows = (n + cols - 1) // cols

        plt.figure(figsize=(4 * cols, 4 * rows))
        for i, path in enumerate(group):
            try:
                img = Image.open(path)
                plt.subplot(rows, cols, i + 1)
                plt.imshow(img)
                plt.title(os.path.basename(path))
                plt.axis("off")
            except Exception as e:
                print("Error loading:", path, e)
        plt.tight_layout()
        plt.show()


# ============================== Run Visualization ==============================
show_duplicate_groups(hr_dupes)
show_duplicate_groups(lr_dupes)


In [None]:
# ======================= Low-Resolution Duplicate Cleanup =======================
import os
import shutil
from PIL import Image


# ============================== Config & Directories ==============================
HR_DIR = r"D:\Projects\Enhance photo\data\HQ"
LR_DIR = r"D:\Projects\Enhance photo\data\LQ"
BACKUP_DIR = "backup_deleted"
os.makedirs(BACKUP_DIR, exist_ok=True)


# ============================== Delete Lower-Resolution Duplicates ==============================
def delete_lowres_duplicates(duplicate_groups, backup=True):
    for group in duplicate_groups:
        if len(group) <= 1:
            continue

        resolutions = []
        for p in group:
            if not os.path.exists(p):
                resolutions.append((p, 0, 0))
                continue
            try:
                with Image.open(p) as img:
                    w, h = img.size
                resolutions.append((p, w, h))
            except:
                resolutions.append((p, 0, 0))

        resolutions_sorted = sorted(resolutions, key=lambda x: x[1]*x[2])  
        to_delete = [p for p, w, h in resolutions_sorted[:-1]] 

        keep = resolutions_sorted[-1][0]
        print(f"Keeping: {keep}")
        print(f"Deleting {len(to_delete)} lower-resolution duplicates...")

        for hr_path in to_delete:
            if not os.path.exists(hr_path):
                print("HR missing, skipping:", hr_path)
                continue

            base = os.path.splitext(os.path.basename(hr_path))[0]

            if backup:
                try:
                    shutil.copy2(hr_path, os.path.join(BACKUP_DIR, os.path.basename(hr_path)))
                except:
                    pass

            try:
                os.remove(hr_path)
                print("Deleted HR:", hr_path)
            except:
                print("Failed HR delete:", hr_path)

            for ext in [".png", ".jpg", ".jpeg"]:
                lr_path = os.path.join(LR_DIR, base + ext)
                if os.path.exists(lr_path):
                    if backup:
                        try:
                            shutil.copy2(lr_path, os.path.join(BACKUP_DIR, os.path.basename(lr_path)))
                        except:
                            pass
                    try:
                        os.remove(lr_path)
                        print("Deleted LR:", lr_path)
                    except:
                        print("Failed LR delete:", lr_path)

    print("Done")


# ============================== Run Duplicate Cleanup ==============================
delete_lowres_duplicates(hr_dupes)


Keeping: D:\Projects\Enhance photo\data\HQ\1_4.jpg
Deleting 1 lower-resolution duplicates...
HR missing, skipping: D:\Projects\Enhance photo\data\HQ\pexels-sasha-kim-8483335.jpg
Keeping: D:\Projects\Enhance photo\data\HQ\1_56.jpg
Deleting 1 lower-resolution duplicates...
HR missing, skipping: D:\Projects\Enhance photo\data\HQ\pexels-rigged-3196847.jpg
Keeping: D:\Projects\Enhance photo\data\HQ\1_62.jpg
Deleting 1 lower-resolution duplicates...
HR missing, skipping: D:\Projects\Enhance photo\data\HQ\pexels-matvalina-16479601.jpg
Keeping: D:\Projects\Enhance photo\data\HQ\1_68.jpg
Deleting 1 lower-resolution duplicates...
HR missing, skipping: D:\Projects\Enhance photo\data\HQ\pexels-shvetsa-5325696.jpg
Keeping: D:\Projects\Enhance photo\data\HQ\1_8.jpg
Deleting 1 lower-resolution duplicates...
HR missing, skipping: D:\Projects\Enhance photo\data\HQ\pexels-sozutova-12937490.jpg
Keeping: D:\Projects\Enhance photo\data\HQ\2_135.jpg
Deleting 1 lower-resolution duplicates...
HR missing, skip

In [None]:
# ======================= Find LQ Images Missing in HQ =======================
import os

# ============================== Config Directories ==============================
HQ_DIR = r"D:\Projects\Enhance photo\data\HQ"
LQ_DIR = r"D:\Projects\Enhance photo\data\LQ"

# ============================== Missing LQ Detection ==============================
def find_lq_missing_in_hq(hq_dir, lq_dir):
    hq_files = {os.path.splitext(f)[0] for f in os.listdir(hq_dir) if os.path.isfile(os.path.join(hq_dir, f))}
    lq_files = {os.path.splitext(f)[0] for f in os.listdir(lq_dir) if os.path.isfile(os.path.join(lq_dir, f))}

    missing_in_hq = lq_files - hq_files

    print(f"Found {len(missing_in_hq)} LQ images missing in HQ:\n")
    for name in sorted(missing_in_hq):
        print(name)

    return list(missing_in_hq)

# ============================== Run Missing Detection ==============================
missing = find_lq_missing_in_hq(HQ_DIR, LQ_DIR)


Found 60 LQ images missing in HQ:

a-m-l22Y1DnZaZY-unsplash
a-m-l22Y1DnZaZY-unsplash (2)
alec-pasteur-KcIfpwoy-84-unsplash
alireza-dolati-h5L7zGSd_AY-unsplash1111111111
aliya-amangeldi-K0YPAxzA4AM-unsplashaliya-amangeldi-K0YPAxzA4AM-unsplash
brian-lawson-a-mtphgCGo8-unsplash
brock-wegner-6KqQac69UGE-unsplash
brock-wegner-sHPmo8RfrJE-unsplash
crosby-hinze-U4lYi3CF1f0-unsplash
dominik-day-4PScH9qmA-M-unsplash
eugene-chystiakov-Qgl-2Bgg5d8-unsplashD8D8D8D8D8D8
fadkhera-official-_vNsuW4il_k-unsplash
fares-hamouche-Pd7ZGjbEbg0-unsplash
fares-hamouche-ZvqjS4vyQz8-unsplashfares-fares-fares-fares
full_body_fashion_model_1_1775475474744
full_body_fashion_model_1_42
full_body_fashion_model_1_4875875527752972
full_body_fashion_model_1_5000000000000
full_body_fashion_model_1_51111111111111111111
gabriela-wysocka-6VfML7zLB_I-unsplash
golf-modz-4L-ZdOiSONs-unsplashgolf-golf-
golf-modz-IcbvdhmQ5B4-unsplashIcbvdhmQ5B4IcbvdhmQ5B4IcbvdhmQ5B4
gylain-omer-qEQMq_tOB2g-unsplash666878879
heber-barahona-nQVwN

In [None]:
# ======================= Delete LQ Images Missing in HQ =======================
import os
import shutil

# ============================== Config Directories ==============================
HQ_DIR = r"D:\Projects\Enhance photo\data\HQ"
LQ_DIR = r"D:\Projects\Enhance photo\data\LQ"
BACKUP_DIR = r"D:\Projects\Enhance photo\backup_LQ_missing"
os.makedirs(BACKUP_DIR, exist_ok=True)

# ============================== Delete Missing LQ ==============================
def delete_lq_missing_in_hq(hq_dir, lq_dir, backup_dir=None):
    hq_files = {os.path.splitext(f)[0] for f in os.listdir(hq_dir) if os.path.isfile(os.path.join(hq_dir, f))}
    lq_files = {os.path.splitext(f)[0] for f in os.listdir(lq_dir) if os.path.isfile(os.path.join(lq_dir, f))}

    missing_in_hq = lq_files - hq_files
    print(f"Found {len(missing_in_hq)} LQ images missing in HQ.\n")

    for name in missing_in_hq:
        for ext in [".png", ".jpg", ".jpeg"]:
            lq_path = os.path.join(lq_dir, name + ext)
            if os.path.exists(lq_path):
                if backup_dir:
                    shutil.copy2(lq_path, os.path.join(backup_dir, os.path.basename(lq_path)))
                os.remove(lq_path)
                print(f"Deleted LQ: {lq_path}")

    print("\nDone! All missing LQ images deleted.")

# ============================== Run Missing LQ Cleanup ==============================
delete_lq_missing_in_hq(HQ_DIR, LQ_DIR, BACKUP_DIR)


Found 60 LQ images missing in HQ.

Deleted LQ: D:\Projects\Enhance photo\data\LQ\yehor-milohrodskyi-CKPvvvmN4Fw-unsplash980JHFJV.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\full_body_fashion_model_1_4875875527752972.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\mojtaba-mosayebzadeh-kYrMIBGWbp4-unsplashunsplashunsplash.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\houcine-ncib-R_x4o6SuM6Q-unsplash.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\jay-soundo-eeGqwRD4Vcg-unsplash.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\fares-hamouche-ZvqjS4vyQz8-unsplashfares-fares-fares-fares.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\pars-sahin-V1FboRuROrE-unsplash.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\mojtaba-mosayebzadeh-axz2j2_QCP0-unsplash.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\ivan-kazlouskij-cgt4WTkCycM-unsplashJDAHLDAJL;.png
Deleted LQ: D:\Projects\Enhance photo\data\LQ\pexels-dziana-hasanbekava-7275453.png
Deleted LQ: D:\Projects\Enhance phot

In [None]:
# ======================= Image Augmentation for HQ Dataset =======================
import os
from PIL import Image, ImageOps, ImageEnhance
import random


# ============================== Config Directories ==============================
HQ_DIR = r"D:\Projects\Enhance photo\data\HQ"
AUG_DIR = r"D:\Projects\Enhance photo\HQ_augmented22"
os.makedirs(AUG_DIR, exist_ok=True)


# ============================== Augmentation Function ==============================
def augment_image(img):
    if random.random() < 0.5:
        img = ImageOps.mirror(img)

    angle = random.uniform(-10, 10)
    img = img.rotate(angle, expand=True, fillcolor=(0,0,0))

    enhancer = ImageEnhance.Brightness(img)
    img = enhancer.enhance(random.uniform(0.9, 1.1))

    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(random.uniform(0.9, 1.1))

    return img


# ============================== Run Augmentation ==============================
for fname in os.listdir(HQ_DIR):
    if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
        path = os.path.join(HQ_DIR, fname)
        try:
            img = Image.open(path).convert('RGB')
            aug_img = augment_image(img)
            base, ext = os.path.splitext(fname)
            aug_path = os.path.join(AUG_DIR, f"{base}_aug{ext}")
            aug_img.save(aug_path)
        except Exception as e:
            print("Failed:", fname, e)

print("Augmentation completed, images saved in:", AUG_DIR)

Augmentation completed, images saved in: D:\Projects\Enhance photo\HQ_augmented22


In [None]:
# ======================= Smart Train/Val/Test Split for Existing Dataset =======================
import os
import shutil
import random
import glob
from tqdm import tqdm


# ============================== Split Function ==============================
def split_existing_smart(hr_source, lr_source, dest_root, split=(0.8, 0.1, 0.1)):
    print("Creating folder structure...")
    for mode in ['train', 'val', 'test']:
        os.makedirs(os.path.join(dest_root, 'HR', mode), exist_ok=True)
        os.makedirs(os.path.join(dest_root, 'LR', mode), exist_ok=True)

    print("Indexing LR images...")
    lr_map = {}
    
    lr_files = []
    for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
        lr_files.extend(glob.glob(os.path.join(lr_source, ext)))
    
    if len(lr_files) == 0:
        print("Error: No images found in LR folder!")
        return

    for f_path in lr_files:
        filename = os.path.basename(f_path)
        name_no_ext = os.path.splitext(filename)[0]
        lr_map[name_no_ext] = f_path

    print("Matching HR with LR...")
    hr_files = []
    for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
        hr_files.extend(glob.glob(os.path.join(hr_source, ext)))
        
    valid_pairs = []

    for hr_path in hr_files:
        filename = os.path.basename(hr_path)
        name_no_ext = os.path.splitext(filename)[0]
        
        if name_no_ext in lr_map:
            lr_path = lr_map[name_no_ext]
            valid_pairs.append((hr_path, lr_path))
        else:
            pass

    random.seed(42)
    random.shuffle(valid_pairs)
    
    total = len(valid_pairs)
    if total == 0:
        print("CRITICAL ERROR: No matching pairs found! Check folder paths.")
        return

    n_train = int(total * split[0])
    n_val = int(total * split[1])
    
    train_pairs = valid_pairs[:n_train]
    val_pairs = valid_pairs[n_train:n_train+n_val]
    test_pairs = valid_pairs[n_train+n_val:]

    datasets = {
        'train': train_pairs,
        'val': val_pairs,
        'test': test_pairs
    }

    print(f"\nFound {total} valid pairs.")
    print(f"Split: Train={len(train_pairs)}, Val={len(val_pairs)}, Test={len(test_pairs)}")
    print("-" * 40)


    print("Copying files to respective folders...")
    for mode, pairs in datasets.items():
        print(f"Copying {mode} set ({len(pairs)} images)...")
        for hr_p, lr_p in tqdm(pairs):
            hr_name = os.path.basename(hr_p)
            lr_name = os.path.basename(lr_p)
            shutil.copy2(hr_p, os.path.join(dest_root, 'HR', mode, hr_name))
            shutil.copy2(lr_p, os.path.join(dest_root, 'LR', mode, lr_name))

    print("-" * 40)
    print(f"DONE! Your custom dataset is ready at: {dest_root}")


# ============================== Main Execution ==============================
if __name__ == "__main__":
    hr_dir = r"D:\Projects\Enhance photo\data\HQ"
    lr_dir = r"D:\Projects\Enhance photo\data\LQ" 
    out_dir = r"D:\Projects\Enhance photo\final_data"

    split_existing_smart(hr_dir, lr_dir, out_dir)


Creating folder structure...
Indexing LR images...
Matching HR with LR...

Found 3898 valid pairs.
Split: Train=3118, Val=389, Test=391
----------------------------------------
Copying train set (3118 images)...


100%|██████████| 3118/3118 [01:04<00:00, 48.61it/s]


Copying val set (389 images)...


100%|██████████| 389/389 [00:07<00:00, 49.84it/s]


Copying test set (391 images)...


100%|██████████| 391/391 [00:07<00:00, 49.85it/s]

----------------------------------------
DONE! Your custom dataset is ready at: D:\Projects\Enhance photo\final_data





In [None]:
# ======================= Smart Patch Preparation (Informative Patches) =======================
import os
import random
import numpy as np
from PIL import Image
from tqdm import tqdm
import shutil


# ============================== Check Patch Informativeness ==============================
def is_patch_informative(img_pil, threshold=100):
    img_gray = img_pil.convert('L')
    arr = np.array(img_gray)
    variance = np.var(arr)
    return variance > threshold


# ============================== Prepare Smart Dataset Patches ==============================
def prepare_smart_patches(src_root, dest_root, scale=4, n_patches=10, lr_patch_size=48):
    
    hr_patch_size = lr_patch_size * scale
    print(f"Target: LR Patch={lr_patch_size}px (Smart Filtering ON)")
    
    if os.path.exists(dest_root):
        try:
            shutil.rmtree(dest_root)
        except:
            pass

    for mode in ['train', 'val']:
        print(f"\nProcessing {mode} set...")
        
        hr_src_dir = os.path.join(src_root, 'HR', mode)
        lr_src_dir = os.path.join(src_root, 'LR', mode)
        
        hr_dest_dir = os.path.join(dest_root, 'HR', mode); os.makedirs(hr_dest_dir, exist_ok=True)
        lr_dest_dir = os.path.join(dest_root, 'LR', mode); os.makedirs(lr_dest_dir, exist_ok=True)
        
        images = os.listdir(hr_src_dir)
        
        for img_name in tqdm(images):
            if not img_name.lower().endswith(('.png', '.jpg', '.jpeg')): continue
            name_no_ext = os.path.splitext(img_name)[0]
            
            hr_path = os.path.join(hr_src_dir, img_name)
            lr_path = None
            for ext in ['.png', '.jpg', '.jpeg']:
                temp = os.path.join(lr_src_dir, name_no_ext + ext)
                if os.path.exists(temp):
                    lr_path = temp
                    break
            
            if not lr_path: continue

            try:
                img_hr = Image.open(hr_path).convert('RGB')
                img_lr = Image.open(lr_path).convert('RGB')
                
                w_lr, h_lr = img_lr.size
                
                if w_lr < lr_patch_size or h_lr < lr_patch_size:
                    img_hr.save(os.path.join(hr_dest_dir, f"{name_no_ext}_0.png"))
                    img_lr.save(os.path.join(lr_dest_dir, f"{name_no_ext}_0.png"))
                    continue

                patches_saved = 0
                attempts = 0
                max_attempts = 50 
                
                while patches_saved < n_patches and attempts < max_attempts:
                    attempts += 1
                    
                    x_lr = random.randint(0, w_lr - lr_patch_size)
                    y_lr = random.randint(0, h_lr - lr_patch_size)
                    
                    patch_lr = img_lr.crop((x_lr, y_lr, x_lr + lr_patch_size, y_lr + lr_patch_size))
                    
                    if not is_patch_informative(patch_lr, threshold=50) and attempts < 30:
                        continue 
                    
                    x_hr = x_lr * scale
                    y_hr = y_lr * scale
                    patch_hr = img_hr.crop((x_hr, y_hr, x_hr + hr_patch_size, y_hr + hr_patch_size))
                    
                    patch_hr.save(os.path.join(hr_dest_dir, f"{name_no_ext}_{patches_saved}.png"))
                    patch_lr.save(os.path.join(lr_dest_dir, f"{name_no_ext}_{patches_saved}.png"))
                    patches_saved += 1
                    
            except Exception as e:
                print(f"Error in {img_name}: {e}")

    print("\nCopying Test set...")
    test_src_hr = os.path.join(src_root, 'HR', 'test')
    test_src_lr = os.path.join(src_root, 'LR', 'test')
    
    os.makedirs(os.path.join(dest_root, 'HR', 'test'), exist_ok=True)
    os.makedirs(os.path.join(dest_root, 'LR', 'test'), exist_ok=True)
    
    test_files = sorted(os.listdir(test_src_hr))[:50]
    for fname in test_files:
         name_no_ext = os.path.splitext(fname)[0]
         shutil.copy2(os.path.join(test_src_hr, fname), os.path.join(dest_root, 'HR', 'test', fname))
         for ext in ['.png', '.jpg', '.jpeg']:
             p = os.path.join(test_src_lr, name_no_ext + ext)
             if os.path.exists(p):
                 shutil.copy2(p, os.path.join(dest_root, 'LR', 'test', name_no_ext + ".png"))
                 break

    print(f"\nDone! Smart dataset (Informative Patches) is ready at: {dest_root}")


# ============================== Main Execution ==============================
if __name__ == "__main__":
    src = r"D:\Projects\Enhance photo\final_data"
    dest = r"D:\Projects\Enhance photo\Mini_Dataset_Smart_Kaggle"
    
    prepare_smart_patches(src, dest)


Target: LR Patch=48px (Smart Filtering ON)

Processing train set...


 18%|█▊        | 567/3119 [07:03<44:53,  1.06s/it]  

Error in 5_347.png: image file is truncated


100%|██████████| 3119/3119 [30:42<00:00,  1.69it/s] 



Processing val set...


100%|██████████| 389/389 [03:08<00:00,  2.07it/s]



Copying Test set...

Done! Smart dataset (Informative Patches) is ready at: D:\Projects\Enhance photo\Mini_Dataset_Smart_Kaggle
