In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0, Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 10
SEED = 42



In [3]:
original_df = pd.read_csv("BreakHis\labels.csv")
eff_kmeans_df = pd.read_csv("EfficientNetB0_KMeans.csv")

# If you have Xception-Birch file:
xcep_birch_df = pd.read_csv("Xception_Birch.csv")

In [4]:
def prepare_data(df, label_column):

    df = df.copy()

    le = LabelEncoder()
    df["label_encoded"] = le.fit_transform(df[label_column])

    num_classes = df["label_encoded"].nunique()

    X = df["image_name"].values
    y = to_categorical(df["label_encoded"], num_classes)

    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y,
        test_size=0.3,
        random_state=SEED,
        stratify=df["label_encoded"]
    )

    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp,
        test_size=0.5,
        random_state=SEED
    )

    return X_train, X_val, X_test, y_train, y_val, y_test, num_classes


In [5]:
import os

MIXED_FOLDER = "BreakHis\mixed_images"   

def load_image(image_name):
    full_path = os.path.join(MIXED_FOLDER, image_name)

    img = tf.keras.preprocessing.image.load_img(
        full_path,
        target_size=(IMG_SIZE, IMG_SIZE)
    )
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = img / 255.0
    return img


def create_dataset(X, y):
    images = np.array([load_image(name) for name in X])
    return images, y


In [6]:
def build_efficientnet(num_classes):

    base = EfficientNetB0(
        weights=None,            # NO pretrained
        include_top=False,
        input_shape=(IMG_SIZE, IMG_SIZE, 3)
    )

    x = GlobalAveragePooling2D()(base.output)
    output = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=base.input, outputs=output)

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

In [7]:
def build_xception(num_classes):

    base = Xception(
        weights=None,           # NO pretrained
        include_top=False,
        input_shape=(IMG_SIZE, IMG_SIZE, 3)
    )

    x = GlobalAveragePooling2D()(base.output)
    output = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=base.input, outputs=output)

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

In [8]:
print(original_df.columns)


Index(['image_name', 'class', 'tumor_type', 'magnification'], dtype='object')


In [10]:
X_train, X_val, X_test, y_train, y_val, y_test, num_classes = prepare_data(original_df,"class")

X_train_img, y_train = create_dataset(X_train, y_train)
X_val_img, y_val = create_dataset(X_val, y_val)
X_test_img, y_test = create_dataset(X_test, y_test)

model_eff_original = build_efficientnet(num_classes)

model_eff_original.fit(
    X_train_img, y_train,
    validation_data=(X_val_img, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE
)

loss1, acc1 = model_eff_original.evaluate(X_test_img, y_test)
print("EfficientNet trained on ORIGINAL labels Accuracy:", acc1)

Epoch 1/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 1s/step - accuracy: 0.7814 - loss: 0.5392 - val_accuracy: 0.6847 - val_loss: 1.2481
Epoch 2/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 1s/step - accuracy: 0.8452 - loss: 0.3743 - val_accuracy: 0.6847 - val_loss: 0.9314
Epoch 3/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 1s/step - accuracy: 0.8568 - loss: 0.3390 - val_accuracy: 0.6847 - val_loss: 2.6158
Epoch 4/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 1s/step - accuracy: 0.8770 - loss: 0.2840 - val_accuracy: 0.4705 - val_loss: 1.2357
Epoch 5/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 1s/step - accuracy: 0.8943 - loss: 0.2542 - val_accuracy: 0.7015 - val_loss: 1.1312
Epoch 6/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 1s/step - accuracy: 0.9142 - loss: 0.2132 - val_accuracy: 0.4722 - val_loss: 1.8335
Epoch 7/10
[1m173/173

In [11]:
X_train, X_val, X_test, y_train, y_val, y_test, num_classes = prepare_data(
    eff_kmeans_df,
    label_column="cluster_label"   # <-- adjust if needed
)

X_train_img, y_train = create_dataset(X_train, y_train)
X_val_img, y_val = create_dataset(X_val, y_val)
X_test_img, y_test = create_dataset(X_test, y_test)




model_eff_cluster = build_efficientnet(num_classes)

model_eff_cluster.fit(
    X_train_img, y_train,
    validation_data=(X_val_img, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE
)

loss2, acc2 = model_eff_cluster.evaluate(X_test_img, y_test)

print("\nEfficientNet trained on KMeans labels Accuracy:", acc2)


Epoch 1/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 1s/step - accuracy: 0.9090 - loss: 0.2906 - val_accuracy: 0.9275 - val_loss: 0.3722
Epoch 2/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 1s/step - accuracy: 0.9207 - loss: 0.1763 - val_accuracy: 0.9275 - val_loss: 0.4959
Epoch 3/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 1s/step - accuracy: 0.9268 - loss: 0.1612 - val_accuracy: 0.9275 - val_loss: 0.4504
Epoch 4/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 1s/step - accuracy: 0.9353 - loss: 0.1454 - val_accuracy: 0.9275 - val_loss: 0.2153
Epoch 5/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 1s/step - accuracy: 0.9398 - loss: 0.1358 - val_accuracy: 0.9570 - val_loss: 0.1145
Epoch 6/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 1s/step - accuracy: 0.9433 - loss: 0.1330 - val_accuracy: 0.9553 - val_loss: 0.1235
Epoch 7/10
[1m173/173

In [12]:
# Prepare data (original labels)

X_train, X_val, X_test, y_train, y_val, y_test, num_classes = prepare_data(
    original_df,
    label_column="class"   # original label column
)

X_train_img, y_train = create_dataset(X_train, y_train)
X_val_img, y_val = create_dataset(X_val, y_val)
X_test_img, y_test = create_dataset(X_test, y_test)

# Build model
model_x_original = build_xception(num_classes)

# Train
model_x_original.fit(
    X_train_img, y_train,
    validation_data=(X_val_img, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE
)

# Evaluate
loss3, acc3 = model_x_original.evaluate(X_test_img, y_test)

print("\nXception trained on ORIGINAL labels Accuracy:", acc3)


Epoch 1/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m424s[0m 2s/step - accuracy: 0.8174 - loss: 0.4442 - val_accuracy: 0.6847 - val_loss: 0.6465
Epoch 2/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m415s[0m 2s/step - accuracy: 0.8595 - loss: 0.3314 - val_accuracy: 0.6847 - val_loss: 0.6147
Epoch 3/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m423s[0m 2s/step - accuracy: 0.8687 - loss: 0.3049 - val_accuracy: 0.7074 - val_loss: 0.7351
Epoch 4/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m428s[0m 2s/step - accuracy: 0.8848 - loss: 0.2674 - val_accuracy: 0.7799 - val_loss: 0.7816
Epoch 5/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 2s/step - accuracy: 0.8983 - loss: 0.2386 - val_accuracy: 0.7420 - val_loss: 1.2816
Epoch 6/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 2s/step - accuracy: 0.9079 - loss: 0.2216 - val_accuracy: 0.6872 - val_loss: 5.3573
Epoch 7/10
[1m173/173

In [13]:
# Prepare data (Birch cluster labels)

X_train, X_val, X_test, y_train, y_val, y_test, num_classes = prepare_data(
    xcep_birch_df,
    label_column="cluster_label"   # <-- change if different
)

X_train_img, y_train = create_dataset(X_train, y_train)
X_val_img, y_val = create_dataset(X_val, y_val)
X_test_img, y_test = create_dataset(X_test, y_test)

# Build model
model_x_cluster = build_xception(num_classes)

# Train
model_x_cluster.fit(
    X_train_img, y_train,
    validation_data=(X_val_img, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE
)

# Evaluate
loss4, acc4 = model_x_cluster.evaluate(X_test_img, y_test)

print("\nXception trained on Birch labels Accuracy:", acc4)


Epoch 1/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m441s[0m 2s/step - accuracy: 0.7820 - loss: 0.4960 - val_accuracy: 0.3744 - val_loss: 0.6934
Epoch 2/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m430s[0m 2s/step - accuracy: 0.8212 - loss: 0.3769 - val_accuracy: 0.6256 - val_loss: 0.7479
Epoch 3/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 2s/step - accuracy: 0.8304 - loss: 0.3663 - val_accuracy: 0.6256 - val_loss: 1.2099
Epoch 4/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 2s/step - accuracy: 0.8405 - loss: 0.3418 - val_accuracy: 0.7184 - val_loss: 1.1649
Epoch 5/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 2s/step - accuracy: 0.8524 - loss: 0.3249 - val_accuracy: 0.7057 - val_loss: 1.1122
Epoch 6/10
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 2s/step - accuracy: 0.8591 - loss: 0.3121 - val_accuracy: 0.5658 - val_loss: 2.2141
Epoch 7/10
[1m173/173

In [14]:

# =========================
# FINAL COMPARISON
# =========================

print("\n========== FINAL RESULTS ==========")
print("EfficientNet Original Accuracy:", acc1)
print("EfficientNet Cluster Accuracy :", acc2)
print("Xception Original Accuracy    :", acc3)
print("Xception Cluster Accuracy     :", acc4)


EfficientNet Original Accuracy: 0.780117928981781
EfficientNet Cluster Accuracy : 0.9081718325614929
Xception Original Accuracy    : 0.7700084447860718
Xception Cluster Accuracy     : 0.46756529808044434
