# Model Training

### Import Libraries & Load Metadata

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pathlib import Path
import cv2
from tensorflow.keras.utils import to_categorical

In [12]:
meta_path = "../data/cibs-ddsm/metadata/meta.csv"
df = pd.read_csv(meta_path)

### Load Images into Memory

In [13]:
def load_images(df, size=(224, 224)):
    X, y = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["processed_path"], cv2.IMREAD_GRAYSCALE)
        if img is not None:
            X.append(img)
            y.append(row["label"])
    X = np.array(X)[..., np.newaxis] / 255.0  # Normalize to 0–1
    y = np.array(y)
    return X, y

### Split dataset

In [14]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)
X_train, y_train = load_images(train_df)
X_test, y_test = load_images(test_df)

### Build CNN Model with Augmentation

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten,
                                     Dense, Dropout, RandomFlip,
                                     RandomRotation, RandomZoom)

model = Sequential([
    Input(shape=(224, 224, 1)),  
    RandomFlip("horizontal"),
    RandomRotation(0.1),
    RandomZoom(0.1),
    Conv2D(32, (3, 3), activation="relu"),
    MaxPooling2D(),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D(),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

In [17]:
model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [18]:
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=32
)

Epoch 1/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 222ms/step - accuracy: 0.5420 - loss: 0.8956 - val_accuracy: 0.5849 - val_loss: 0.7625
Epoch 2/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 217ms/step - accuracy: 0.5842 - loss: 0.6943 - val_accuracy: 0.5849 - val_loss: 0.6794
Epoch 3/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 220ms/step - accuracy: 0.6019 - loss: 0.6769 - val_accuracy: 0.5849 - val_loss: 0.6811
Epoch 4/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 218ms/step - accuracy: 0.5940 - loss: 0.6795 - val_accuracy: 0.5849 - val_loss: 0.6791
Epoch 5/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 220ms/step - accuracy: 0.5970 - loss: 0.6765 - val_accuracy: 0.5849 - val_loss: 0.6783
Epoch 6/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 218ms/step - accuracy: 0.6002 - loss: 0.6745 - val_accuracy: 0.5849 - val_loss: 0.6791
Epoch 7/10
[1m72/72[

In [19]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")
model.save("../results/model_weights/final_model.h5")

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.5922 - loss: 0.6780




Test Accuracy: 0.5910


In [20]:
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np

# Get predicted probabilities
y_probs = model.predict(X_test)

# Convert probabilities to binary predictions
y_pred = (y_probs > 0.5).astype(int)

# 1. Classification report: precision, recall, f1-score, accuracy
print(classification_report(y_test, y_pred, target_names=["Benign", "Malignant"]))

# 2. ROC-AUC score (based on raw probabilities, not thresholded)
auc = roc_auc_score(y_test, y_probs)
print(f"ROC AUC Score: {auc:.4f}")

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
              precision    recall  f1-score   support

      Benign       0.59      1.00      0.74       422
   Malignant       0.00      0.00      0.00       292

    accuracy                           0.59       714
   macro avg       0.30      0.50      0.37       714
weighted avg       0.35      0.59      0.44       714

ROC AUC Score: 0.5360


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
