In [None]:
import numpy as np
import cv2
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
INPUT_DIR = "/kaggle/input/chest-xray-pneumonia/chest_xray/test/NORMAL"
OUTPUT_DIR = "/kaggle/working/weak_masks"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
def generate_weak_mask_cxr(img_path):
    # 1. carregar
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    h, w = img.shape
    img = cv2.resize(img, (512, 512))
    
    # 2. Torna branco tudo que estiver fora do intervalo [50, 180] 
    # para facilitar a geração de uma boa máscara
    img[img < 50] = 255 
    img[img > 180] = 255
    
    # 3. equalização + suavização
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    I = clahe.apply(img)
    I = cv2.GaussianBlur(I, (7,7), 0)
    
    for ang in np.arange(0, 151, 30):
        se = np.zeros((15, 15), np.uint8)
        cv2.ellipse(se, (15//2, 15//2), (15//2, 1), ang, 0, 360, 1, -1)
        I_supp = cv2.morphologyEx(I, cv2.MORPH_OPEN, se)
        I = cv2.min(I, I_supp)
        
    # 4. inverter para que opacidades fiquem claras
    I_inv = cv2.normalize(255 - I, None, 0, 255, cv2.NORM_MINMAX)

    # 5. opções de threshold (escolha uma)
    _, bw = cv2.threshold(I_inv, 127, 255, cv2.THRESH_BINARY)
    
    # 6. limpar regiões fora dos pulmões
    # remove um pequeno pedaço do topo e das laterais que não incluem o pulmão (geralmente)
    bw[:40, :], bw[:, -70:], bw[:, :70] = 0,0, 0
    
    # 7. morfologia para suavizar
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
    bw = cv2.morphologyEx(bw, cv2.MORPH_OPEN, kernel)
    bw = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
    
    # 8. manter regiões internas
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bw)
    mask_out = np.zeros_like(bw)
    
    for i in range(1, num_labels):
        area = stats[i, cv2.CC_STAT_AREA]
        x,y = stats[i, cv2.CC_STAT_LEFT], stats[i, cv2.CC_STAT_TOP]
        wc,hc = stats[i, cv2.CC_STAT_WIDTH], stats[i, cv2.CC_STAT_HEIGHT]

        # descarta regiões no topo e muito pequenas
        if area > 500 and y > 39:
            mask_out[labels == i] = 255
    
    # 9. voltar ao tamanho original
    mask_out = cv2.resize(mask_out, (w, h))
    return (mask_out > 0).astype(np.uint8)

In [None]:
# Gera máscaras binárias de 0 ou 255.
for filename in tqdm(os.listdir(INPUT_DIR)):
    # if filename.lower().endswith(".png":
        img_path = os.path.join(INPUT_DIR, filename)
        mask = generate_weak_mask_cxr(img_path)
        cv2.imwrite(os.path.join(OUTPUT_DIR, filename.rsplit(".",1)[0] + ".png"), (mask*255))

100%|██████████| 138/138 [00:37<00:00,  3.72it/s]


In [None]:
sample = "MCUCXR_0001_0.png"

img = cv2.imread(os.path.join(INPUT_DIR, sample), cv2.IMREAD_GRAYSCALE)
mask = cv2.imread(os.path.join(OUTPUT_DIR, sample.rsplit(".",1)[0] + ".png"), cv2.IMREAD_GRAYSCALE)

plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title("Imagem Original")
plt.imshow(img, cmap="gray")
plt.axis("off")

plt.subplot(1,2,2)
plt.title("Weak Label Gerada")
plt.imshow(mask, cmap="gray")
plt.axis("off")
plt.show()