# Detección de Cáncer de Mama con Mixture of Experts
Este notebook muestra un modelo CNN básico y una arquitectura MoE para el dataset BreastMNIST.

In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from medmnist import BreastMNIST, INFO
from sklearn.metrics import classification_report, confusion_matrix, f1_score, roc_auc_score

In [ ]:
data_flag = 'breastmnist'
info = INFO[data_flag]
DataClass = BreastMNIST
train_ds = DataClass(split='train', download=True)
val_ds = DataClass(split='val', download=True)
test_ds = DataClass(split='test', download=True)

def preprocess(img, label):
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.expand_dims(img, -1)
    label = tf.cast(label, tf.int32)
    return img, label

train_ds_tf = tf.data.Dataset.from_tensor_slices((train_ds.imgs, train_ds.labels)).map(preprocess).shuffle(1000)
val_ds_tf = tf.data.Dataset.from_tensor_slices((val_ds.imgs, val_ds.labels)).map(preprocess)
test_ds_tf = tf.data.Dataset.from_tensor_slices((test_ds.imgs, test_ds.labels)).map(preprocess)

In [ ]:
def get_distribution(ds):
    labels = [int(l) for _,l in ds]
    unique, counts = np.unique(labels, return_counts=True)
    return dict(zip(unique, counts))

print('Distribución entrenamiento:', get_distribution(train_ds_tf))
print('Distribución validación:', get_distribution(val_ds_tf))


In [ ]:
plt.figure(figsize=(6,3))
for cls in [0,1]:
    images = [img.numpy().squeeze() for img,l in train_ds_tf if int(l)==cls][:5]
    for i,img in enumerate(images):
        plt.subplot(2,5,cls*5+i+1)
        plt.imshow(img, cmap='gray')
        plt.axis('off')
        if i==0: plt.ylabel('Benigno' if cls==0 else 'Maligno')
plt.suptitle('Ejemplos por clase')
plt.show()
for cls in [0,1]:
    images = np.stack([img.numpy().squeeze() for img,l in train_ds_tf if int(l)==cls])
    mean_img = images.mean(axis=0)
    plt.imshow(mean_img, cmap='gray')
    plt.title('Promedio clase %d'%cls)
    plt.axis('off')
    plt.show()

In [ ]:
def build_cnn():
    inputs = keras.Input(shape=(64,64,1))
    x = layers.Conv2D(32,3,activation='relu')(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64,3,activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(64,activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(1,activation='sigmoid')(x)
    return keras.Model(inputs, outputs)

In [ ]:
data_augmentation = keras.Sequential([layers.RandomFlip('horizontal'), layers.RandomRotation(0.1), layers.RandomZoom(0.1)])

def augment(img,label):
    return data_augmentation(img), label

batch_size = 32
train_aug = train_ds_tf.map(augment).batch(batch_size).prefetch(2)
val_ds_b = val_ds_tf.batch(batch_size).prefetch(2)

In [ ]:
labels=[int(l) for _,l in train_ds_tf]
neg,pos=np.bincount(labels)
total=neg+pos
class_weights={0:(1/neg)*(total/2.0),1:(1/pos)*(total/2.0)}
class_weights

In [ ]:
cnn=build_cnn()
cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name='auc')])
history=cnn.fit(train_aug, validation_data=val_ds_b, epochs=20, class_weight=class_weights)

In [ ]:
test_b=test_ds_tf.batch(batch_size)
probs=cnn.predict(test_b)
y_true=np.concatenate([y for _,y in test_b],axis=0)
y_pred=(probs.ravel()>0.5).astype(int)
print(classification_report(y_true,y_pred))
print('F1:',f1_score(y_true,y_pred))
print('AUC:',roc_auc_score(y_true,probs))
cm=confusion_matrix(y_true,y_pred)
sns.heatmap(cm,annot=True,fmt='d');plt.xlabel('Pred');plt.ylabel('True');plt.show()
plt.figure();plt.plot(history.history['loss'],label='train');plt.plot(history.history['val_loss'],label='val');plt.legend();plt.xlabel('Epoch');plt.ylabel('Loss');plt.show()
plt.figure();plt.plot(history.history['accuracy'],label='train');plt.plot(history.history['val_accuracy'],label='val');plt.legend();plt.xlabel('Epoch');plt.ylabel('Accuracy');plt.show()

In [ ]:
def build_expert():
    inp=keras.Input(shape=(64,64,1))
    x=layers.Conv2D(32,3,activation='relu')(inp)
    x=layers.MaxPooling2D()(x)
    x=layers.Conv2D(64,3,activation='relu')(x)
    x=layers.MaxPooling2D()(x)
    x=layers.Flatten()(x)
    x=layers.Dense(64,activation='relu')(x)
    return keras.Model(inp,x)

def build_moe(n_experts=2):
    inputs=keras.Input(shape=(64,64,1))
    experts=[build_expert()(inputs) for _ in range(n_experts)]
    expert_outputs=[layers.Dense(1)(e) for e in experts]
    gate=layers.Flatten()(inputs)
    gate=layers.Dense(32,activation='relu')(gate)
    gate=layers.Dense(n_experts,activation='softmax')(gate)
    gate=tf.expand_dims(gate,-1)
    concat=tf.stack(expert_outputs,axis=1)
    weighted=concat*gate
    out=tf.reduce_sum(weighted,axis=1)
    out=layers.Activation('sigmoid')(out)
    return keras.Model(inputs,out)

In [ ]:
results={}
for n in [2,4,8]:
    moe=build_moe(n)
    moe.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name='auc')])
    h=moe.fit(train_aug, validation_data=val_ds_b, epochs=20, class_weight=class_weights)
    probs=moe.predict(test_b)
    y_pred=(probs.ravel()>0.5).astype(int)
    f1=f1_score(y_true,y_pred)
    auc=roc_auc_score(y_true,probs)
    results[n]={'history':h.history,'f1':f1,'auc':auc}
    print(f'MoE {n} expertos - F1:{f1:.4f} AUC:{auc:.4f}')

In [ ]:
for n,res in results.items():
    plt.figure();
    plt.plot(res['history']['loss'],label='train');
    plt.plot(res['history']['val_loss'],label='val');
    plt.title(f'MoE {n} expertos');
    plt.legend();
    plt.show()