In [None]:
import kagglehub
import cv2
import numpy as np
from pathlib import Path
import os
import random
from scipy.ndimage import gaussian_filter, distance_transform_edt
import matplotlib.pyplot as plt

path = kagglehub.dataset_download("ashwingupta3012/human-faces")
print("Path to dataset files:", path)

class RealisticBurnGenerator:
    def __init__(self, output_dir="burn_augmented", seed=None):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)
        os.makedirs(f"{output_dir}/original", exist_ok=True)
        os.makedirs(f"{output_dir}/with_burns", exist_ok=True)

    def detect_face_region(self, img):
        h, w = img.shape[:2]
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 5, minSize=(60, 60))
        if len(faces) > 0:
            face = max(faces, key=lambda x: x[2] * x[3])
            x, y, fw, fh = face
            shrink_w = int(fw * 0.08)
            shrink_h = int(fh * 0.08)
            x0 = x + shrink_w
            y0 = y + shrink_h
            fw = fw - 2 * shrink_w
            fh = fh - 2 * shrink_h
            return {
                'x': x0, 
                'y': y0, 
                'w': fw, 
                'h': fh, 
                'center_x': x0 + fw // 2, 
                'center_y': y0 + fh // 2
            }
        return {
            'x': int(w*0.3), 
            'y': int(h*0.25), 
            'w': int(w*0.4), 
            'h': int(h*0.5),
            'center_x': w//2, 
            'center_y': h//2
        }

    def create_face_mask(self, img_shape, face_region):
        h, w = img_shape[:2]
        fx, fy, fw, fh = face_region['x'], face_region['y'], face_region['w'], face_region['h']
        face_mask = np.zeros((h, w), dtype=np.float32)
        face_mask[fy:fy+fh, fx:fx+fw] = 1.0
        face_mask = cv2.GaussianBlur(face_mask, (0, 0), sigmaX=10, sigmaY=10)
        cy, cx = face_region['center_y'], face_region['center_x']
        yy, xx = np.mgrid[0:h, 0:w]
        rx = fw / 2.0
        ry = fh / 2.0
        ellipse = (((xx - cx) / rx)**2 + ((yy - cy) / ry)**2) <= 1.0
        ellipse_mask = ellipse.astype(np.float32)
        ellipse_mask = cv2.GaussianBlur(ellipse_mask, (0, 0), sigmaX=8, sigmaY=8)
        combined_mask = np.maximum(face_mask * 0.7, ellipse_mask)
        combined_mask = cv2.GaussianBlur(combined_mask, (0, 0), sigmaX=6, sigmaY=6)
        return combined_mask

    def fractal_noise(self, shape, octaves=4, persistence=0.5):
        h, w = shape
        noise = np.zeros((h, w), dtype=np.float32)
        frequency = 1.0
        amplitude = 1.0
        for _ in range(octaves):
            n = np.random.rand(h, w)
            n = gaussian_filter(n, sigma=max(1.0, (1.0 / frequency) * 3.0))
            noise += n * amplitude
            amplitude *= persistence
            frequency *= 2.0
        noise = (noise - noise.min()) / (noise.max() - noise.min() + 1e-12)
        return noise

    def create_burn_mask(self, img_shape, face_region, face_boundary_mask, size_factor=0.33):  # <--- Made LARGER!
        h, w = img_shape[:2]
        fx, fy, fw, fh = face_region['x'], face_region['y'], face_region['w'], face_region['h']
        cx = random.randint(fx + fw//4, fx + 3*fw//4)
        cy = random.randint(fy + fh//4, fy + 3*fh//4)
        base_radius_x = int(fw * size_factor * random.uniform(0.7, 1.0))  # larger lower bound
        base_radius_y = int(fh * size_factor * random.uniform(0.7, 1.0))
        mask = np.zeros((h, w), dtype=np.float32)
        yy, xx = np.mgrid[0:h, 0:w]
        ellipse = (((xx - cx) / (base_radius_x + 1e-6))**2 + ((yy - cy) / (base_radius_y + 1e-6))**2)
        mask_core = np.clip(1.0 - ellipse, 0.0, 1.0)
        num_blobs = random.randint(4, 7)  # slightly more blobs
        blobs = np.zeros_like(mask_core)
        for _ in range(num_blobs):
            ox = cx + random.randint(-base_radius_x//2, base_radius_x//2)
            oy = cy + random.randint(-base_radius_y//2, base_radius_y//2)
            r_x = int(base_radius_x * random.uniform(0.4, 0.8))
            r_y = int(base_radius_y * random.uniform(0.4, 0.8))
            ell = (((xx - ox) / (r_x + 1e-6))**2 + ((yy - oy) / (r_y + 1e-6))**2)
            blobs += np.clip(1.0 - ell, 0.0, 1.0)
        mask = np.clip(mask_core * 1.3 + blobs * 0.7, 0.0, 1.0)
        noise = self.fractal_noise((h, w), octaves=5, persistence=0.6)
        mask *= gaussian_filter(noise, sigma=6.0) * 1.2 + 0.3
        mask = mask * face_boundary_mask
        mask = gaussian_filter(mask, sigma=8.0)
        mask = (mask - mask.min()) / (mask.max() - mask.min() + 1e-12)
        mask = np.clip((mask - 0.13) / (1 - 0.13), 0.0, 1.0)
        safety_mask = cv2.erode(face_boundary_mask, np.ones((15, 15), np.uint8), iterations=1)
        mask = mask * safety_mask
        return (mask * 255).astype(np.uint8)

    def apply_burn(self, img, mask, severity='severe'):
        result = img.copy().astype(np.float32)
        h, w = img.shape[:2]
        lab = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2LAB).astype(np.float32)
        mask_float = mask.astype(np.float32) / 255.0
        feathered = cv2.GaussianBlur(mask_float, (0, 0), sigmaX=15, sigmaY=15, borderType=cv2.BORDER_REFLECT)
        dil = cv2.dilate(mask, np.ones((25, 25), np.uint8), iterations=1)
        surround = (dil > 0) & (mask == 0)
        surround_pixels = lab[surround]
        if len(surround_pixels) > 0:
            base_L, base_A, base_B = np.median(surround_pixels, axis=0)
        else:
            base_L, base_A, base_B = 150.0, 128.0, 128.0
        params = {
            # Intensify parameters for a harsher effect
            'severe': {'L_mult': 0.45, 'A_shift': 38, 'B_shift': -24, 'char_factor': 0.9,'texture_amp': 0.98},
        }
        p = params['severe']
        ys, xs = np.where(mask > 0)
        if len(ys) == 0:
            return img
        y0, y1 = max(0, ys.min()-10), min(h, ys.max()+10)
        x0, x1 = max(0, xs.min()-10), min(w, xs.max()+10)
        sub_h, sub_w = y1 - y0, x1 - x0
        noise = self.fractal_noise((sub_h, sub_w), octaves=5, persistence=0.65)
        noise = gaussian_filter(noise, sigma=1.2)
        noise = (noise - noise.min()) / (noise.max() - noise.min() + 1e-12)
        mask_sub = (mask[y0:y1, x0:x1] / 255.0).astype(np.float32)
        dist = distance_transform_edt(mask_sub)
        centroid_weight = (dist.max() - dist) / (dist.max() + 1e-6)
        centroid_weight = (centroid_weight - centroid_weight.min()) / (centroid_weight.max() - centroid_weight.min() + 1e-12)
        for dy in range(sub_h):
            for dx in range(sub_w):
                my = y0 + dy
                mx = x0 + dx
                alpha = feathered[my, mx]
                if alpha < 0.02: continue
                tex = noise[dy, dx]
                center_factor = centroid_weight[dy, dx] if centroid_weight.size else 0.0
                # Strong color & luminance modifications
                L_target = base_L * (p['L_mult'] * (0.91 + tex * p['texture_amp']))
                A_target = base_A + p['A_shift'] * (0.76 + tex * 0.8) * (1 - center_factor * p['char_factor'])
                B_target = base_B + p['B_shift'] * (0.82 + tex * 0.6) - center_factor * 13.0 * p['char_factor']
                if p['char_factor'] > 0 and center_factor > 0.5:
                    char_mix = min(1.0, (center_factor - 0.5) * 2.0 * p['char_factor'])
                    L_target *= (0.33 + 0.67 * (1 - char_mix))
                    A_target = base_A + (A_target - base_A) * (1 - 0.95 * char_mix)
                    B_target -= 22.0 * char_mix
                L_target *= random.uniform(0.95, 1.05)
                A_target += random.uniform(-2.5, 2.5)
                B_target += random.uniform(-1.7, 1.7)
                lab_val = lab[my, mx]
                lab[my, mx, 0] = lab_val[0] * (1 - alpha) + L_target * alpha
                lab[my, mx, 1] = lab_val[1] * (1 - alpha) + A_target * alpha
                lab[my, mx, 2] = lab_val[2] * (1 - alpha) + B_target * alpha
        lab_clipped = np.clip(lab, 0, 255).astype(np.uint8)
        result = cv2.cvtColor(lab_clipped, cv2.COLOR_LAB2BGR).astype(np.float32)
        bumps = gaussian_filter(self.fractal_noise((sub_h, sub_w), octaves=6, persistence=0.6), sigma=1.0)
        bumps = (bumps - bumps.min()) / (bumps.max() - bumps.min() + 1e-12)
        blister_mask = (bumps > 0.75).astype(np.float32) * mask_sub
        spec = np.zeros_like(result, dtype=np.float32)
        spec_val = 42.0  # harsh specular highlights
        for c in range(3):
            spec[y0:y1, x0:x1, c] += blister_mask * spec_val
        spec = gaussian_filter(spec, sigma=1.1)
        result = np.clip(result + spec * 0.82, 0, 255)
        blur = cv2.GaussianBlur(result, (0, 0), sigmaX=15, sigmaY=15)
        mask_3ch = np.dstack([feathered, feathered, feathered])
        result = result * (1 - mask_3ch * 0.53) + blur * (mask_3ch * 0.53)
        result = np.clip(result, 0, 255).astype(np.uint8)
        hsv = cv2.cvtColor(result, cv2.COLOR_BGR2HSV).astype(np.float32)
        hsv[..., 1] = hsv[..., 1] * (1 - 0.24 * mask_3ch[..., 0])
        result = cv2.cvtColor(np.clip(hsv, 0, 255).astype(np.uint8), cv2.COLOR_HSV2BGR)
        return result

    def generate_burn(self, img, severity='severe'):
        face_region = self.detect_face_region(img)
        face_boundary_mask = self.create_face_mask(img.shape, face_region)
        burn_mask = self.create_burn_mask(img.shape, face_region, face_boundary_mask)  # increased size_factor by default
        out = self.apply_burn(img, burn_mask, severity=severity)
        return out

    def process_all_images(self, dataset_path, max_images=3000):
        image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
        image_files = []
        for ext in image_extensions:
            image_files.extend(Path(dataset_path).rglob(f'*{ext}'))
            image_files.extend(Path(dataset_path).rglob(f'*{ext.upper()}'))
        image_files = list(dict.fromkeys([str(p) for p in image_files]))[:max_images]
        total_images = len(image_files)
        print(f"Found {total_images} images in dataset")
        print(f"Processing ONLY first {total_images} images with SEVERE burns...\n")
        stats = {'original': 0, 'burns_severe': 0, 'failed': 0}
        for idx, img_path in enumerate(image_files):
            try:
                img = cv2.imread(img_path)
                if img is None:
                    stats['failed'] += 1
                    print(f"âœ— {idx+1}/{total_images}: Could not read {Path(img_path).name}")
                    continue
                h, w = img.shape[:2]
                if h > 1000 or w > 1000:
                    scale = min(1000/h, 1000/w)
                    img = cv2.resize(img, (int(w*scale), int(h*scale)))
                base = f"img_{idx:05d}"
                cv2.imwrite(f"{self.output_dir}/original/{base}_orig.jpg", img)
                stats['original'] += 1
                burned = self.generate_burn(img.copy(), severity='severe')
                cv2.imwrite(f"{self.output_dir}/with_burns/{base}_severe.jpg", burned)
                stats['burns_severe'] += 1
                if (idx + 1) % 10 == 0 or (idx + 1) == total_images:
                    print(f"âœ“ Progress: {idx+1}/{total_images} ({(idx+1)/total_images*100:.1f}%)")
            except Exception as e:
                stats['failed'] += 1
                print(f"âœ— Error processing {Path(img_path).name}: {e}")
        print("\n" + "="*60)
        print("PROCESSING COMPLETE")
        print("="*60)
        for k, v in stats.items():
            print(f"  {k}: {v}")
        print(f"\nTotal images processed: {stats['original']}")
        print(f"Total burn variations created: {stats['burns_severe']}")
        return stats

    def create_comparison(self, dataset_path, num_samples=3):
        image_files = []
        for ext in ['.jpg', '.jpeg', '.png']:
            image_files.extend(list(Path(dataset_path).rglob(f'*{ext}'))[:num_samples])
        image_files = list(dict.fromkeys([str(p) for p in image_files]))[:num_samples]
        if not image_files:
            print("No images found")
            return
        fig, axes = plt.subplots(len(image_files), 2, figsize=(9, 4.5 * len(image_files)))
        if len(image_files) == 1:
            axes = axes.reshape(1, -1)
        for idx, img_path in enumerate(image_files):
            img = cv2.imread(img_path)
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w = img_rgb.shape[:2]
            if h > 400:
                scale = 400 / h
                img_rgb = cv2.resize(img_rgb, (int(w*scale), int(h*scale)))
                img = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
            axes[idx, 0].imshow(img_rgb)
            axes[idx, 0].set_title('Original', fontsize=12)
            axes[idx, 0].axis('off')
            severe = self.generate_burn(img.copy(), 'severe')
            axes[idx, 1].imshow(cv2.cvtColor(severe, cv2.COLOR_BGR2RGB))
            axes[idx, 1].set_title('Severe Burn', fontsize=11)
            axes[idx, 1].axis('off')
        plt.tight_layout()
        out = f'{self.output_dir}/comparison.png'
        plt.savefig(out, dpi=150, bbox_inches='tight')
        plt.show()
        print(f"âœ“ Saved: {out}")

if __name__ == "__main__":
    print("ðŸ”¬ Realistic Burn Generator (Face-Constrained)")
    print("=" * 60 + "\n")
    gen = RealisticBurnGenerator(output_dir="burn_augmented", seed=42)
    print("ðŸ“Š Creating comparison grid...\n")
    gen.create_comparison(path, num_samples=3)
    print("\nðŸ”„ Processing ONLY first 3000 images in dataset with SEVERE burns...\n")
    gen.process_all_images(path, max_images=3000)
    print("\nâœ… Done! 3000 images processed with severe and slightly larger burns strictly confined to facial regions!")


In [None]:
import shutil

def create_zip_from_folder(folder_path, zip_name='burn_output.zip'):
    """
    Compress the entire folder into a ZIP archive.
    """
    shutil.make_archive(base_name=zip_name.replace('.zip', ''),format='zip', root_dir="/kaggle/working/burn_augmented")


create_zip_from_folder('burn_augmented', 'burn_output.zip')
print("ðŸš€ ZIP file created: burn_output.zip")