In [None]:

!pip install pydicom tqdm opencv-python-headless

import os, pathlib
import numpy as np
import cv2
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

try:
    import pydicom
    HAS_PYDICOM = True
except Exception:
    HAS_PYDICOM = False

from google.colab import drive
drive.mount('/content/drive')



def _tri_fuzzy_memberships(n_bins=256):
    x = np.arange(n_bins, dtype=np.float32)
    dark = np.clip(1 - x/127.0, 0, 1)
    mid = np.maximum(1 - np.abs(x-127)/64.0, 0)
    bright= np.clip((x-128)/127.0, 0, 1)
    s = dark + mid + bright + 1e-6
    return dark/s, mid/s, bright/s

def _smooth_hist(h, ksize=7):
    k = cv2.getGaussianKernel(ksize, ksize/3)
    k = (k @ k.T).flatten(); k = k / k.sum()
    pad = ksize // 2
    hp = np.pad(h, (pad, pad), mode='reflect')
    return np.correlate(hp, k, mode='valid')

def _find_minima(h, min_gap=32, max_regions=4):
    candidates = []
    for i in range(2, len(h)-2):
        if h[i] < h[i-1] and h[i] < h[i+1] and h[i] <= h[i-2] and h[i] <= h[i+2]:
            candidates.append(i)
    picks = []
    for c in candidates:
        if not picks or c - picks[-1] >= min_gap:
            picks.append(c)
        if len(picks) >= (max_regions-1):
            break
    picks = [p for p in picks if 8 <= p <= 247]
    return sorted(picks)

def _equalize_region(cdf, lo, hi):
    lo = int(lo); hi = int(hi)
    lo = max(0, lo); hi = min(len(cdf) - 1, hi)
    if lo >= hi:
        return np.arange(256, dtype=np.uint8)
    c_lo, c_hi = cdf[lo], cdf[hi]
    den = max(c_hi - c_lo, 1e-8)

    m = np.arange(lo, hi + 1)
    m = np.clip(m, 0, len(cdf) - 1).astype(np.int32)

    mapped = (cdf[m] - c_lo) / den
    mapped = np.round(mapped * (hi - lo) + lo).astype(np.uint8)
    lut = np.arange(256, dtype=np.uint8)
    lut[lo:hi+1] = np.clip(mapped, 0, 255).astype(np.uint8)
    return lut


def _apply_dynamic_fuzzy_he(gray_uint8, blend=0.6, ksize=11):
    hist = cv2.calcHist([gray_uint8],[0],None,[256],[0,256]).flatten().astype(np.float32)
    f1, f2, f3 = _tri_fuzzy_memberships(256)
    fh = _smooth_hist(hist * (f1 + f2 + f3), ksize=ksize)
    splits = _find_minima(fh, min_gap=32, max_regions=4)
    regions = []
    last = 0
    for s in splits:
        regions.append((last, s)); last = s+1
    regions.append((last, 255))
    pdf = fh / (fh.sum() + 1e-8)
    cdf = np.cumsum(pdf)
    lut = np.arange(256, dtype=np.uint8)
    for (lo, hi) in regions:
        lut_region = _equalize_region(cdf, lo, hi)
        lut[lo:hi+1] = lut_region[lo:hi+1]
    y0 = gray_uint8.astype(np.uint8)
    y_map = cv2.LUT(y0, lut).astype(np.float32)
    m0, m1 = float(y0.mean()), float(y_map.mean())
    y_bp = np.clip(y_map + (m0 - m1), 0, 255)
    out = (1.0 - blend) * y0.astype(np.float32) + blend * y_bp
    return np.clip(out, 0, 255).astype(np.uint8)

def bpdfhe_gray(gray_uint8, blend=0.6, ksize=11):
    return _apply_dynamic_fuzzy_he(gray_uint8, blend=blend, ksize=ksize)

def load_cxr_as_float(path):
    ext = pathlib.Path(path).suffix.lower()
    if ext == ".dcm" and HAS_PYDICOM:
        ds = pydicom.dcmread(path)
        arr = ds.pixel_array.astype(np.float32)
        photometric = getattr(ds, "PhotometricInterpretation", "MONOCHROME2")
        if photometric == "MONOCHROME1":
            arr = np.max(arr) - arr
        return arr
    else:
        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        if img is None:
            raise ValueError(f"Cannot read {path}")
        if img.ndim == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return img.astype(np.float32)

def robust_uint8(img_f, p_low=1.0, p_high=99.0):
    lo, hi = np.percentile(img_f, [p_low, p_high])
    if hi <= lo:
        lo, hi = float(img_f.min()), float(img_f.max())
    if hi <= lo:
        hi = lo + 1.0
    img = np.clip((img_f - lo) / (hi - lo), 0, 1) * 255.0
    return img.astype(np.uint8)

def preprocess_cxr_bpdfhe(path, blend=0.6, ksize=11, p_low=1.0, p_high=99.0):
    img_f = load_cxr_as_float(path)
    u8 = robust_uint8(img_f, p_low=p_low, p_high=p_high)
    out = bpdfhe_gray(u8, blend=blend, ksize=ksize)
    return out

def _is_supported(path):
    return pathlib.Path(path).suffix.lower() in (".png",".jpg",".jpeg",".tif",".tiff",".dcm")

def collect_files(root):
    root = pathlib.Path(root)
    return [p for p in root.rglob("*") if p.is_file() and _is_supported(p)]

def save_jpg_gray(out_path, img_u8, quality=95):
    cv2.imwrite(str(out_path), img_u8, [int(cv2.IMWRITE_JPEG_QUALITY), int(quality)])

def process_one(src_path, src_root, dst_root, blend, ksize, p_low, p_high, quality):
    rel = src_path.relative_to(src_root)
    out_dir = dst_root / rel.parent
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / (rel.stem + ".jpg")
    try:
        out = preprocess_cxr_bpdfhe(str(src_path), blend=blend, ksize=ksize, p_low=p_low, p_high=p_high)
        save_jpg_gray(out_path, out, quality=quality)
        return True, str(rel)
    except Exception as e:
        return False, f"{rel} => {e}"

def process_folder_cxr_to_jpg(src_root, dst_root,
                              blend=0.6, ksize=11,
                              p_low=1.0, p_high=99.0,
                              quality=95, workers=4):
    src_root = pathlib.Path(src_root)
    dst_root = pathlib.Path(dst_root)
    files = collect_files(src_root)
    ok, fail = 0, 0
    results = []
    with ThreadPoolExecutor(max_workers=workers) as ex:
        futures = [ex.submit(process_one, p, src_root, dst_root,
                             blend, ksize, p_low, p_high, quality)
                   for p in files]
        for f in tqdm(futures, total=len(futures), desc="BPDFHE -> JPG"):
            status, msg = f.result()
            if status:
                ok += 1
            else:
                fail += 1; results.append(msg)
    print(f"\nDone. Success: {ok}, Failed: {fail}")
    if fail:
        print("Some errors (first 25):")
        for r in results[:25]:
            print(" -", r)
    return ok, fail


input_dir = "/content/drive/MyDrive/dp/data/oiiiii"
output_dir = "/content/drive/MyDrive/dp/data/sajib"
os.makedirs(output_dir, exist_ok=True)

# Tuning parameters
BLEND = 0.6
KSIZE = 11
P_LOW, P_HIGH = 1.0, 99.0
JPEG_QUALITY = 95
WORKERS = 4

# Run
ok, fail = process_folder_cxr_to_jpg(
    input_dir, output_dir,
    blend=BLEND, ksize=KSIZE,
    p_low=P_LOW, p_high=P_HIGH,
    quality=JPEG_QUALITY, workers=WORKERS
)

print("All done ✅")