# **Proyecto Final Inteligencia Artificial**

### Autores: **Angel David Piñeros Sierra**, **Camilo Andrés Roncancio Toca**, **Kelly Johana Solano Calderón**
### Presentado a: **Darwin Eduardo Martinez Riaño**


---

## **Modelo de segmentación de imágenes para la localización de lesiones asociadas al cáncer de piel**

### *Glosario*

### *(A) Descripción de la problemática*

### *(B) Objetivo*

### *(C) Descripción del dataset*

El dataset seleccionado para la evaluación del modelo fue el denominado “Skin cancer: HAM10000” de la plataforma de Kaggle, el cual ofrece un conjunto de imágenes especiales para realizar tareas de segmentación y clasificación. Para el propósito de segmentación, el dataset incluye para cada una de las imágenes, el conjunto de máscaras qué determinan la segmentación de las lesiones de cáncer de piel. 

> El acrónimo HAM10000 significa “Human Against Machine with 10000 training images”. 

Este dataset es una recopilación de imágenes demoscópicas de diferentes poblaciones. Estas fueron originalmente publicadas inicialmente en el repositorio de Harvard Dataverse,  con el propósito de abordar la dificultad de encontrar un dataset lo suficientemente grande y diverso para realizar diagnósticos automatizados de lesiones cutáneas pigmentadas. 

El dataset se conforma de dos carpetas: images y masks. Cada una con **10015** imágenes en formato **.JPEG**. Todas las imágenes tienen una dimensión de `600px X 450px`

<img src="https://res.cloudinary.com/dlsntlruu/image/upload/v1764556079/carpeta_images_pieoyu.png" width="600px"/>

<img src="https://res.cloudinary.com/dlsntlruu/image/upload/v1764556079/carpeta_masks_taifwn.png" width="600px"/>

Las imágenes incluyen diagnósticos de:
*  Queratosis actínicas
*  Carcinoma intraepitelial
*  Carcinoma basocelular
*  Lesiones de tipo queratosis
*  Dermatofibroma
*  Melanoma
*  Lesiones vasculares

Contar con una amplia gama de diagnósticos permite qué la tarea de segmentación semántica pueda realizarse de forma óptima. 

Para mayor información: 

*  Skin cancer: HAM10000: https://www.kaggle.com/datasets/surajghuwalewala/ham1000-segmentation-and-classification/
*  The HAM10000 dataset, a large collection of multi-source dermatoscopic images of common pigmented skin lesions: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DBW86T



### *(D) Importación y organización de datos*


In [2]:
%pip install kaggle pandas torch PILlow torchvision 

Note: you may need to restart the kernel to use updated packages.


In [11]:
import os
import zipfile
import pandas as pd
import subprocess
import random
import shutil
from pathlib import Path
import torch
from PIL import Image
from torchvision import transforms
import torchvision.transforms.v2 as v2
from torchvision import tv_tensors
from torchvision.transforms import InterpolationMode
from torch.utils.data import Dataset
from torch.utils.data import DataLoader


In [4]:


dataset_url = "surajghuwalewala/ham1000-segmentation-and-classification"
ruta_descarga = "ham1000_data"

if os.path.exists(ruta_descarga):
    print(f"La carpeta '{ruta_descarga}' ya existe. No se descargará de nuevo.")
else:
    os.makedirs(ruta_descarga, exist_ok=True)
    subprocess.run([
        "kaggle", "datasets", "download",
        "-d", dataset_url,
        "-p", ruta_descarga,
        "--unzip"
    ], check=True)

print("Contenido de la carpeta:", os.listdir(ruta_descarga))

images_dir = os.path.join(ruta_descarga, "images")
masks_dir = os.path.join(ruta_descarga, "masks")

if os.path.isdir(images_dir):
    print("Número de imágenes:", len(os.listdir(images_dir)))
else:
    print("No encontré la carpeta 'images'.")

if os.path.isdir(masks_dir):
    print("Número de máscaras:", len(os.listdir(masks_dir)))
else:
    print("No encontré la carpeta 'masks'.")

La carpeta 'ham1000_data' ya existe. No se descargará de nuevo.
Contenido de la carpeta: ['GroundTruth.csv', 'images', 'masks']
Número de imágenes: 10017
Número de máscaras: 10015


In [12]:
DATA_ROOT = Path('ham1000_data')
OUTPUT_ROOT = Path('ham1000_data_splits')
TRAIN_RATIO = 0.85  # train + val
VAL_FROM_TRAIN = 0.15  # porcentaje de train destinado a validación
SEED = 42
MASK_SUFFIX = '_segmentation.png'

image_dir = DATA_ROOT / 'images'
mask_dir = DATA_ROOT / 'masks'
if not image_dir.exists() or not mask_dir.exists():
    raise RuntimeError(f"No se hallaron carpetas esperadas en {DATA_ROOT}.")

random.seed(SEED)
if OUTPUT_ROOT.exists():
    shutil.rmtree(OUTPUT_ROOT)
for split in ('train', 'val', 'test'):
    (OUTPUT_ROOT / split / 'images').mkdir(parents=True, exist_ok=True)
    (OUTPUT_ROOT / split / 'masks').mkdir(parents=True, exist_ok=True)

allowed_exts = {'.png', '.jpg', '.jpeg'}
image_paths = [p for p in image_dir.iterdir() if p.suffix.lower() in allowed_exts]
if not image_paths:
    raise RuntimeError(f'No se encontraron imágenes en {image_dir}.')
image_paths.sort()
random.shuffle(image_paths)

total = len(image_paths)
train_val_count = int(total * TRAIN_RATIO)
val_count = int(train_val_count * VAL_FROM_TRAIN)
train_count = train_val_count - val_count

splits = {
    'train': image_paths[:train_count],
    'val': image_paths[train_count: train_count + val_count],
    'test': image_paths[train_count + val_count:]
}

for split_name, files in splits.items():
    dst_img = OUTPUT_ROOT / split_name / 'images'
    dst_mask = OUTPUT_ROOT / split_name / 'masks'
    for img_path in files:
        mask_path = mask_dir / f"{img_path.stem}{MASK_SUFFIX}"
        if not mask_path.exists():
            raise FileNotFoundError(f'Falta máscara: {mask_path}')
        shutil.copy2(img_path, dst_img / img_path.name)
        shutil.copy2(mask_path, dst_mask / mask_path.name)
    print(f"{split_name}: {len(files)} imágenes")
print('Splits creados en', OUTPUT_ROOT)



train: 7236 imágenes
val: 1276 imágenes
test: 1503 imágenes
Splits creados en ham1000_data_splits


In [13]:
TRAIN_IMG_DIR = Path('ham1000_data_splits/train/images')
if not TRAIN_IMG_DIR.exists():
    raise RuntimeError(f'No existe el directorio: {TRAIN_IMG_DIR}')

TARGET_SIZE = (256, 256)
resize_transform = transforms.Resize(TARGET_SIZE, interpolation=InterpolationMode.BILINEAR)

img_files = sorted(
    [p for p in TRAIN_IMG_DIR.iterdir() if p.suffix.lower() in {'.png', '.jpg', '.jpeg'}]
)
if not img_files:
    raise RuntimeError(f'No se encontraron imágenes en {TRAIN_IMG_DIR}')

means, stds = [], []
for img_path in img_files:
    img = Image.open(img_path).convert('RGB')
    img_tensor = transforms.ToTensor()(resize_transform(img))
    means.append(img_tensor.mean(dim=(1, 2)))
    stds.append(img_tensor.std(dim=(1, 2)))

IMG_MEAN = torch.stack(means).mean(dim=0)
IMG_STD = torch.stack(stds).mean(dim=0)
print(f'Media por canal (reescala {TARGET_SIZE[0]}x{TARGET_SIZE[1]}): {IMG_MEAN.tolist()}')
print(f'Desviación estándar por canal: {IMG_STD.tolist()}')



Media por canal (reescala 256x256): [0.7633206844329834, 0.5454925894737244, 0.5698098540306091]
Desviación estándar por canal: [0.08873128145933151, 0.11731848120689392, 0.13180507719516754]


In [14]:
normalize_transform = v2.Normalize(mean=IMG_MEAN, std=IMG_STD)

train_joint_transform = v2.Compose([
    v2.Resize(TARGET_SIZE, interpolation=InterpolationMode.BILINEAR, antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomVerticalFlip(p=0.2),
    v2.RandomRotation(degrees=(-180, 180)),
    v2.ElasticTransform(alpha=40.0, sigma=5.0, interpolation=InterpolationMode.BILINEAR),
    v2.ColorJitter(brightness=0.15, contrast=0.2, saturation=0.2, hue=0.02),
    v2.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    v2.ToDtype(torch.float32, scale=True),
    normalize_transform,
])

val_test_joint_transform = v2.Compose([
    v2.Resize(TARGET_SIZE, interpolation=InterpolationMode.BILINEAR, antialias=True),
    v2.ToDtype(torch.float32, scale=True),
    normalize_transform,
])

def apply_train_transforms(image, mask):
    image_tv = tv_tensors.Image(image)
    mask_tv = tv_tensors.Mask(mask)
    image_aug, mask_aug = train_joint_transform(image_tv, mask_tv)
    return image_aug, mask_aug

def apply_val_transforms(image, mask):
    image_tv = tv_tensors.Image(image)
    mask_tv = tv_tensors.Mask(mask)
    image_val, mask_val = val_test_joint_transform(image_tv, mask_tv)
    return image_val, mask_val

def apply_test_transforms(image, mask):
    image_tv = tv_tensors.Image(image)
    mask_tv = tv_tensors.Mask(mask)
    image_test, mask_test = val_test_joint_transform(image_tv, mask_tv)
    return image_test, mask_test

print("Transformaciones definidas:")
print("train_joint_transform:", train_joint_transform)
print("val_test_joint_transform:", val_test_joint_transform)



Transformaciones definidas:
train_joint_transform: Compose(
      Resize(size=[256, 256], interpolation=InterpolationMode.BILINEAR, antialias=True)
      RandomHorizontalFlip(p=0.5)
      RandomVerticalFlip(p=0.2)
      RandomRotation(degrees=[-180.0, 180.0], interpolation=InterpolationMode.NEAREST, expand=False, fill=0)
      ElasticTransform(alpha=[40.0, 40.0], sigma=[5.0, 5.0], interpolation=InterpolationMode.BILINEAR, fill=0)
      ColorJitter(brightness=(0.85, 1.15), contrast=(0.8, 1.2), saturation=(0.8, 1.2), hue=(-0.02, 0.02))
      GaussianBlur(kernel_size=(3, 3), sigma=[0.1, 1.0])
      ToDtype(scale=True)
      Normalize(mean=[tensor(0.7633), tensor(0.5455), tensor(0.5698)], std=[tensor(0.0887), tensor(0.1173), tensor(0.1318)], inplace=False)
)
val_test_joint_transform: Compose(
      Resize(size=[256, 256], interpolation=InterpolationMode.BILINEAR, antialias=True)
      ToDtype(scale=True)
      Normalize(mean=[tensor(0.7633), tensor(0.5455), tensor(0.5698)], std=[tensor(0.0

In [15]:
from torch.utils.data import Dataset

class HAM1000SegmentationDataset(Dataset):
    def __init__(self, root_dir: Path, split: str, transform_fn=None, mask_suffix: str = '_segmentation.png'):
        self.root_dir = Path(root_dir)
        if split not in {'train', 'val', 'test'}:
            raise ValueError("split debe ser 'train', 'val' o 'test'")
        self.split = split
        self.images_dir = self.root_dir / split / 'images'
        self.masks_dir = self.root_dir / split / 'masks'
        if not self.images_dir.exists() or not self.masks_dir.exists():
            raise RuntimeError(f'No se hallan carpetas para el split {split} en {self.root_dir}')
        self.mask_suffix = mask_suffix
        self.transform_fn = transform_fn

        allowed_exts = {'.png', '.jpg', '.jpeg'}
        self.samples = []
        for img_path in sorted(self.images_dir.iterdir()):
            if img_path.suffix.lower() not in allowed_exts:
                continue
            mask_path = self.masks_dir / f"{img_path.stem}{self.mask_suffix}"
            if not mask_path.exists():
                continue
            self.samples.append((img_path, mask_path))
        if not self.samples:
            raise RuntimeError(f'No se encontraron pares imagen-máscara en {self.images_dir}')

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, mask_path = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')
        if self.transform_fn is not None:
            image, mask = self.transform_fn(image, mask)
        return image, mask


def get_transform_fn(split: str):
    if split == 'train':
        return apply_train_transforms
    if split == 'val':
        return apply_val_transforms
    if split == 'test':
        return apply_test_transforms
    raise ValueError('split desconocido')


def create_datasets(root_dir: Path = Path('ham1000_data_splits')):
    datasets = {}
    for split in ('train', 'val', 'test'):
        datasets[split] = HAM1000SegmentationDataset(
            root_dir=root_dir,
            split=split,
            transform_fn=get_transform_fn(split),
        )
        print(f"{split}: {len(datasets[split])} muestras")
    return datasets

ham_datasets = create_datasets()



train: 7236 muestras
val: 1276 muestras
test: 1503 muestras


In [16]:
BATCH_SIZE = 8
NUM_WORKERS = 4
PIN_MEMORY = True

train_loader = DataLoader(
    ham_datasets['train'],
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
)
val_loader = DataLoader(
    ham_datasets['val'],
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
)
test_loader = DataLoader(
    ham_datasets['test'],
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
)

print('Loaders listos:')
print(f"train: {len(train_loader)} batches")
print(f"val: {len(val_loader)} batches")
print(f"test: {len(test_loader)} batches")



Loaders listos:
train: 905 batches
val: 160 batches
test: 188 batches
