In [1]:
import os
import re
from PIL import Image
import Augmentor

import os
import re
from PIL import Image
import Augmentor

def crop_images_in_directory(source_directory, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for filename in os.listdir(source_directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            base_name = os.path.splitext(filename)[0]  # Remove extension from the base name
            file_path = os.path.join(source_directory, filename)
            output_filename = f"{base_name}_1"  # No extension included here
            output_path = os.path.join(output_directory, output_filename + ".jpg")  # Add extension only when saving

            with Image.open(file_path) as img:
                left = (img.width - 768) / 2
                top = (img.height - 768) / 2
                right = (img.width + 768) / 2
                bottom = (img.height + 768) / 2

                cropped_img = img.crop((left, top, right, bottom))
                cropped_img.save(output_path)  # Save with extension

    return len(os.listdir(output_directory))  # Return the count of processed images

def setup_and_run_augmentor(source_directory, output_directory, base_image_count):
    augmentation_factor = 10
    target_sample_size = base_image_count * augmentation_factor

    p = Augmentor.Pipeline(source_directory=source_directory, output_directory=output_directory, save_format="JPEG")
    p.rotate(probability=1.0, max_left_rotation=25, max_right_rotation=25)
    p.flip_left_right(probability=0.5)
    p.flip_top_bottom(probability=0.5)
    p.sample(target_sample_size)


import os
import re

def clean_augmented_filenames(output_directory):
    files = sorted(os.listdir(output_directory))
    seen_names = {}

    for filename in files:
        base_name, ext = os.path.splitext(filename)
        # Eliminar cualquier UUID y resetear el nombre base
        new_base = re.sub(r'(_/w{8}-/w{4}-/w{4}-/w{4}-/w{12})', '', base_name)

        if new_base in seen_names:
            seen_names[new_base] += 1
        else:
            seen_names[new_base] = 1

        new_name = f"{new_base}_{seen_names[new_base]}"  
        new_path = os.path.join(output_directory, new_name + ext)  

        if not os.path.exists(new_path):
            os.rename(os.path.join(output_directory, filename), new_path)
        else:
            print(f"Error: el archivo {new_path} ya existe, no se puede renombrar {filename}")




source_dirs = [
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/First Set/100x Normal Oral Cavity Histopathological Images',
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/First Set/100x OSCC Histopathological Images',
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/Second Set/400x Normal Oral Cavity Histopathological Images',
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/Second Set/400x OSCC Histopathological Images'
]
output_dirs = [
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/First Set/100x Normal Oral Cavity Histopathological Images',
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/First Set/100x OSCC Histopathological Images',
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/Second Set/400x Normal Oral Cavity Histopathological Images',
    'C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/Second Set/400x OSCC Histopathological Images'
]

for source_dir, output_dir in zip(source_dirs, output_dirs):
    cropped_dir = os.path.join(output_dir, "cropped")
    base_image_count = crop_images_in_directory(source_directory=source_dir, output_directory=cropped_dir)
    setup_and_run_augmentor(source_directory=cropped_dir, output_directory=output_dir, base_image_count=base_image_count)
    clean_augmented_filenames(output_directory=output_dir)




Initialised with 89 image(s) found.
Output directory set to C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/First Set/100x Normal Oral Cavity Histopathological Images.

Processing <PIL.Image.Image image mode=RGB size=768x768 at 0x2C1DCA3C940>: 100%|██████████| 890/890 [00:04<00:00, 202.65 Samples/s]


Initialised with 439 image(s) found.
Output directory set to C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/First Set/100x OSCC Histopathological Images.

Processing <PIL.Image.Image image mode=RGB size=768x768 at 0x2C1DAF1DCC0>: 100%|██████████| 4390/4390 [00:22<00:00, 196.41 Samples/s]


Initialised with 201 image(s) found.
Output directory set to C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/Second Set/400x Normal Oral Cavity Histopathological Images.

Processing <PIL.Image.Image image mode=RGB size=768x768 at 0x2C1C279EB30>: 100%|██████████| 2010/2010 [00:09<00:00, 201.60 Samples/s]


Initialised with 495 image(s) found.
Output directory set to C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation/Second Set/400x OSCC Histopathological Images.

Processing <PIL.Image.Image image mode=RGB size=768x768 at 0x2C1DBC92F80>: 100%|██████████| 4950/4950 [00:23<00:00, 209.46 Samples/s]


In [2]:
import os
import shutil

def procesar_imagenes(ruta_origen, ruta_destino):
    """
    Procesa las imágenes de la carpeta de origen y las mueve a la carpeta de destino,
    modificando sus nombres según las especificaciones.
    
    Parámetros:
        ruta_origen (str): Ruta de la carpeta de origen.
        ruta_destino (str): Ruta de la carpeta de destino.
    """
    for carpeta_raiz, carpetas, archivos in os.walk(ruta_origen):
        for archivo in archivos:
            # Obtener la clase de la imagen (benigno o maligno)
            clase = "benigno" if "Normal" in carpeta_raiz else "maligno"
            # Modificar el nombre del archivo
            nuevo_nombre = f"oral_{clase}_{archivo.replace('cropped_original_', '').replace('.jpg', '')}"
            # Ruta completa de origen y destino
            ruta_origen_completa = os.path.join(carpeta_raiz, archivo)
            ruta_destino_completa = os.path.join(ruta_destino, clase, nuevo_nombre)
            # Mover y renombrar la imagen
            shutil.copy(ruta_origen_completa, ruta_destino_completa)

# Rutas de origen y destino
ruta_origen = r"C:/Users/Matias/Desktop/Tesis/long_dataset/oral_cancer_original/augmentation"
ruta_destino = r"C:/Users/Matias/Desktop/Tesis/dataset_consolidado"

# Crear las carpetas benigno y maligno si no existen
if not os.path.exists(os.path.join(ruta_destino, "benigno")):
    os.makedirs(os.path.join(ruta_destino, "benigno"))
if not os.path.exists(os.path.join(ruta_destino, "maligno")):
    os.makedirs(os.path.join(ruta_destino, "maligno"))

# Procesar las imágenes
procesar_imagenes(os.path.join(ruta_origen, "First Set"), ruta_destino)
procesar_imagenes(os.path.join(ruta_origen, "Second Set"), ruta_destino)

print("Proceso completado.")

Proceso completado.
