# MRI Brain Tumor Classification with 5-Fold Cross-Validation (CNN)
Ova bilježnica prikazuje treniranje konvolucijske neuronske mreže (CNN) za detekciju tumora mozga na MRI slikama koristeći 5-fold stratificiranu cross-validaciju. Svi ostali koraci (arhitektura, evaluacija) ostaju isti kao u originalnoj bilježnici.

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc, f1_score, precision_score
import seaborn as sns
import tensorflow as tf

In [None]:
# Učitaj sve slike i pripadajuće labele iz direktorija
from tensorflow.keras.utils import image_dataset_from_directory

all_data = image_dataset_from_directory(
    "../data/BinaryBrainTumorDataset/Training",
    image_size=(224, 224),
    batch_size=None,  # vrati pojedinačne slike
    color_mode="rgb",
    label_mode="binary",
    shuffle=False
)

all_images = []
all_labels = []
for img, label in all_data:
    all_images.append(img.numpy())
    all_labels.append(label.numpy())
all_images = np.stack(all_images)
all_labels = np.array(all_labels).astype(int)

In [None]:
# 5-fold stratificirana cross-validacija
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_accuracies = []
fold_losses = []
fold_histories = []
fold_reports = []

for fold, (train_idx, val_idx) in enumerate(skf.split(all_images, all_labels)):
    print(f"\n--- Fold {fold+1} ---")
    X_train, X_val = all_images[train_idx], all_images[val_idx]
    y_train, y_val = all_labels[train_idx], all_labels[val_idx]

    # Normalizacija
    X_train = X_train / 255.0
    X_val = X_val / 255.0

    # Definiraj CNN model
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])

    history = model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=32,
        validation_data=(X_val, y_val),
        verbose=1
    )
    fold_histories.append(history)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    fold_accuracies.append(val_acc)
    fold_losses.append(val_loss)

    # Predikcije i izvještaj
    y_pred_proba = model.predict(X_val).flatten()
    y_pred = (y_pred_proba > 0.5).astype(int)
    report = classification_report(y_val, y_pred, output_dict=True)
    fold_reports.append(report)
    print(f"Fold {fold+1} - Validation Accuracy: {val_acc:.4f}, Loss: {val_loss:.4f}")

## Rezultati 5-fold cross-validacije

In [None]:
print(f"Prosječna točnost (accuracy) kroz 5 foldova: {np.mean(fold_accuracies):.4f}")
print(f"Prosječni gubitak (loss) kroz 5 foldova: {np.mean(fold_losses):.4f}")

# Prikaz rezultata po foldovima
for i, (acc, loss) in enumerate(zip(fold_accuracies, fold_losses)):
    print(f"Fold {i+1}: Accuracy = {acc:.4f}, Loss = {loss:.4f}")

In [None]:
# Prikaz ROC krivulje i matrice zabune za zadnji fold
from sklearn.metrics import roc_curve, auc

last_history = fold_histories[-1]
last_report = fold_reports[-1]

# Pretpostavljamo da su y_val, y_pred, y_pred_proba iz zadnjeg folda
# (možeš ih spremiti iz petlje ako želiš prikaz za svaki fold)
# Ovdje za zadnji fold:
X_train, X_val = all_images[train_idx], all_images[val_idx]
y_train, y_val = all_labels[train_idx], all_labels[val_idx]
X_val = X_val / 255.0
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train / 255.0, y_train, epochs=1, batch_size=32, verbose=0)  # dummy fit for weights
model.set_weights(fold_histories[-1].model.get_weights())

# Predikcije
y_pred_proba = model.predict(X_val).flatten()
y_pred = (y_pred_proba > 0.5).astype(int)

fpr, tpr, _ = roc_curve(y_val, y_pred_proba)
roc_auc = auc(fpr, tpr)
cm = confusion_matrix(y_val, y_pred)

fig, axes = plt.subplots(1, 3, figsize=(18, 5))
# Accuracy kroz epohe
axes[0].plot(last_history.history['accuracy'], label='Train Accuracy')
axes[0].plot(last_history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].set_title('Preciznost kroz epohe (zadnji fold)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# ROC krivulja
axes[1].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC (AUC = {roc_auc:.3f})')
axes[1].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
axes[1].set_xlabel('False Positive Rate')
axes[1].set_ylabel('True Positive Rate')
axes[1].set_title('ROC krivulja (zadnji fold)')
axes[1].legend(loc="lower right")
axes[1].grid(True, alpha=0.3)
# Matrica zabune
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[2])
axes[2].set_title('Matrica zabune (zadnji fold)')
axes[2].set_xlabel('Predviđeno')
axes[2].set_ylabel('Stvarno')
plt.tight_layout()
plt.show()

print(classification_report(y_val, y_pred, target_names=["No Tumor", "Tumor"]))

**Napomena:** 5-fold cross-validation na dubokim modelima može biti računalno zahtjevna i trajati duže nego standardni train/validation pristup.