In [3]:
import os
import cv2
from tqdm import tqdm
import imutils

def crop_img(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)

    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    if len(cnts) == 0:
        return cv2.resize(img, (256, 256))

    c = max(cnts, key=cv2.contourArea)
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])

    new_img = img[extTop[1]:extBot[1], extLeft[0]:extRight[0]].copy()
    return new_img

if __name__ == "__main__":
    # Basväg till dataset
    base_path = r"C:\Users\karin\Documents\Federated_learning"
    
    # Uppdaterade mappar
    training_path = os.path.join(base_path, "archive", "Training")
    testing_path = os.path.join(base_path, "archive", "Testing")

    IMG_SIZE = 256

    # -------- Preprocess Training --------
    for class_name in tqdm(os.listdir(training_path), desc="Processing Training"):
        input_dir = os.path.join(training_path, class_name)
        output_dir = os.path.join(base_path, "cleaned", "Training", class_name)
        os.makedirs(output_dir, exist_ok=True)

        for img_name in os.listdir(input_dir):
            img_path = os.path.join(input_dir, img_name)
            image = cv2.imread(img_path)
            if image is None:
                print(f"Warning: could not read {img_path}")
                continue
            cropped = crop_img(image)
            resized = cv2.resize(cropped, (IMG_SIZE, IMG_SIZE))
            cv2.imwrite(os.path.join(output_dir, img_name), resized)

    # -------- Preprocess Testing --------
    for class_name in tqdm(os.listdir(testing_path), desc="Processing Testing"):
        input_dir = os.path.join(testing_path, class_name)
        output_dir = os.path.join(base_path, "cleaned", "Testing", class_name)
        os.makedirs(output_dir, exist_ok=True)

        for img_name in os.listdir(input_dir):
            img_path = os.path.join(input_dir, img_name)
            image = cv2.imread(img_path)
            if image is None:
                print(f"Warning: could not read {img_path}")
                continue
            cropped = crop_img(image)
            resized = cv2.resize(cropped, (IMG_SIZE, IMG_SIZE))
            cv2.imwrite(os.path.join(output_dir, img_name), resized)

    print("Preprocessing completed! All images saved under 'cleaned/'")



Processing Training: 100%|██████████| 4/4 [02:23<00:00, 35.95s/it]
Processing Testing: 100%|██████████| 4/4 [00:31<00:00,  7.90s/it]

Preprocessing completed! All images saved under 'cleaned/'



