In [None]:
# ==============================
# Multi-Input, Multi-Task Model
# ==============================

### I. Setup
import os, glob
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

print("TensorFlow:", tf.__version__)


In [58]:

# ==============================
# II. Select Training Mode
# ==============================
# "image" → BUSI dataset (3 classes)
# "textual" → breast_cancer dataset (2 classes)
training_mode = "image"


In [None]:

# ==============================
# III. Image Data (BUSI dataset)
# ==============================
train_data = "Dataset_BUSI_with_GT"

image_size = (256, 256)
batch_size = 16

train_files = [i for i in glob.glob(train_data + "/*/*")]
labels = [os.path.dirname(i).split("/")[-1] for i in train_files]
training_data = pd.DataFrame({"Path": train_files, "Label": labels})

train_df, val_df = train_test_split(training_data, train_size=0.8, shuffle=True, random_state=123)

datagen = ImageDataGenerator(rescale=1.0/255)

train_img_gen = datagen.flow_from_dataframe(
    train_df, x_col="Path", y_col="Label",
    target_size=image_size, class_mode="categorical",
    shuffle=True, batch_size=batch_size
)
val_img_gen = datagen.flow_from_dataframe(
    val_df, x_col="Path", y_col="Label",
    target_size=image_size, class_mode="categorical",
    shuffle=True, batch_size=batch_size
)

num_img_classes = len(train_img_gen.class_indices)


In [35]:

# ==============================
# IV. Textual Data (sklearn breast_cancer)
# ==============================
dataset = load_breast_cancer()
X_tab, Y_tab = dataset.data, dataset.target

scaler = StandardScaler()
X_tab = scaler.fit_transform(X_tab)

X_tab_train, X_tab_val, Y_tab_train, Y_tab_val = train_test_split(
    X_tab, Y_tab, test_size=0.2, random_state=42
)

num_tab_features = X_tab.shape[1]


In [None]:

# ==============================
# V. Build Multi-Task Model
# ==============================

# Image branch
image_input = Input(shape=(256, 256, 3), name="image_input")
base_model = DenseNet121(weights="imagenet", include_top=False, input_tensor=image_input)
for layer in base_model.layers:
    layer.trainable = False
x_img = Flatten()(base_model.output)
x_img = Dense(512, activation="relu")(x_img)
x_img = Dropout(0.5)(x_img)
img_output = Dense(num_img_classes, activation="softmax", name="img_output")(x_img)

# Tabular branch
tabular_input = Input(shape=(num_tab_features,), name="tabular_input")
x_tab = Dense(64, activation="relu")(tabular_input)
x_tab = Dropout(0.3)(x_tab)
x_tab = Dense(32, activation="relu")(x_tab)
txt_output = Dense(1, activation="sigmoid", name="txt_output")(x_tab)

# Multi-task model
multi_model = Model(inputs=[image_input, tabular_input], outputs=[img_output, txt_output])
multi_model.compile(
    optimizer="adam",
    loss={"img_output": "categorical_crossentropy", "txt_output": "binary_crossentropy"},
    metrics={"img_output": "accuracy", "txt_output": "accuracy"}
)
multi_model.summary()


In [None]:

# ==============================
# VI. Training
# ==============================

if training_mode == "image":
    # Extract numpy arrays from generators (concatenate batches)
    X_img_train = np.concatenate([train_img_gen[i][0] for i in range(len(train_img_gen))], axis=0)
    Y_img_train = np.concatenate([train_img_gen[i][1] for i in range(len(train_img_gen))], axis=0)

    X_img_val = np.concatenate([val_img_gen[i][0] for i in range(len(val_img_gen))], axis=0)
    Y_img_val = np.concatenate([val_img_gen[i][1] for i in range(len(val_img_gen))], axis=0)

    history = multi_model.fit(
        [X_img_train, np.zeros((len(X_img_train), num_tab_features))],
        {"img_output": Y_img_train, "txt_output": np.zeros((len(X_img_train), 1))},
        validation_data=(
            [X_img_val, np.zeros((len(X_img_val), num_tab_features))],
            {"img_output": Y_img_val, "txt_output": np.zeros((len(X_img_val), 1))}
        ),
        epochs=10, batch_size=batch_size
    )

elif training_mode == "textual":
    history = multi_model.fit(
        [np.zeros((len(X_tab_train), 256, 256, 3)), X_tab_train],
        {"img_output": np.zeros((len(X_tab_train), num_img_classes)), "txt_output": Y_tab_train},
        validation_data=(
            [np.zeros((len(X_tab_val), 256, 256, 3)), X_tab_val],
            {"img_output": np.zeros((len(X_tab_val), num_img_classes)), "txt_output": Y_tab_val}
        ),
        epochs=30, batch_size=batch_size
    )


In [None]:

# ==============================
# VII. Evaluation
# ==============================
if training_mode == "image":
    preds = multi_model.predict([X_img_val, np.zeros((len(X_img_val), num_tab_features))])
    y_true = np.argmax(Y_img_val, axis=1)
    y_pred = np.argmax(preds[0], axis=1)

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=list(train_img_gen.class_indices.keys()),
                yticklabels=list(train_img_gen.class_indices.keys()))
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Image Confusion Matrix")
    plt.show()

    print("\nClassification Report (Images):")
    print(classification_report(y_true, y_pred, digits=4))

elif training_mode == "textual":
    preds = multi_model.predict([np.zeros((len(X_tab_val), 256, 256, 3)), X_tab_val])
    y_true = Y_tab_val
    y_pred = (preds[1] > 0.5).astype(int)

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Benign','Malignant'], yticklabels=['Benign','Malignant'])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Textual Confusion Matrix")
    plt.show()

    print("\nClassification Report (Textual):")
    print(classification_report(y_true, y_pred, digits=4))
