<a href="https://colab.research.google.com/github/samiha-mahin/Ovarian-cancer/blob/main/HT_VGG19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install kaggle tensorflow numpy pandas opencv-python matplotlib scikit-learn




In [2]:
from google.colab import files
files.upload()  # Upload your kaggle.json

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"samihamuntahamahin","key":"1ad5aaba9143fbd38da418ad8d278398"}'}

In [3]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d bitsnpieces/ovarian-cancer-and-subtypes-dataset-histopathology --unzip

Dataset URL: https://www.kaggle.com/datasets/bitsnpieces/ovarian-cancer-and-subtypes-dataset-histopathology
License(s): CC-BY-SA-4.0


In [None]:
# ✅ Imports
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications.vgg19 import preprocess_input
from sklearn.model_selection import train_test_split

# ✅ Dataset setup
DATASET_PATH = "OvarianCancer"
classes = ["Mucinous", "Non_Cancerous", "Endometri", "Serous", "Clear_Cell"]
class_mapping = {name: i for i, name in enumerate(classes)}
IMG_SIZE = 224

X, y = [], []

# ✅ Load & preprocess images
for class_name in classes:
    folder = os.path.join(DATASET_PATH, class_name)
    if not os.path.exists(folder): continue
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        img = cv2.imread(image_path)
        if img is None: continue
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = preprocess_input(img)
        X.append(img)
        y.append(class_mapping[class_name])

X = np.array(X)
y = to_categorical(y, num_classes=len(classes))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"✅ Loaded {len(X)} images.")

# ✅ Load VGG19 base
base_model = VGG19(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
for layer in base_model.layers:
    layer.trainable = False  # Freeze all layers

# ✅ Add custom top layers
x = GlobalAveragePooling2D()(base_model.output)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.4)(x)
output = Dense(len(classes), activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=output)

# ✅ Compile model
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

# ✅ Data augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.3,
    shear_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(X_train)

# ✅ Callbacks
early_stop = EarlyStopping(monitor="val_accuracy", patience=3, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor="val_accuracy", factor=0.5, patience=2, verbose=1)
checkpoint = ModelCheckpoint("best_model.h5", monitor="val_accuracy", save_best_only=True, verbose=1)

# ✅ Train model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=16),
    validation_data=(X_test, y_test),
    epochs=30,
    callbacks=[early_stop, reduce_lr, checkpoint],
    verbose=1
)


✅ Loaded 497 images.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.1944 - loss: 2.6598 
Epoch 1: val_accuracy improved from -inf to 0.21000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 16s/step - accuracy: 0.1943 - loss: 2.6590 - val_accuracy: 0.2100 - val_loss: 2.1818 - learning_rate: 1.0000e-04
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.2404 - loss: 2.6017 
Epoch 2: val_accuracy improved from 0.21000 to 0.27000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 15s/step - accuracy: 0.2411 - loss: 2.5975 - val_accuracy: 0.2700 - val_loss: 1.8536 - learning_rate: 1.0000e-04
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.2511 - loss: 2.3631 
Epoch 3: val_accuracy improved from 0.27000 to 0.32000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 15s/step - accuracy: 0.2505 - loss: 2.3655 - val_accuracy: 0.3200 - val_loss: 1.7035 - learning_rate: 1.0000e-04
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.2904 - loss: 2.2661 
Epoch 4: val_accuracy improved from 0.32000 to 0.39000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 15s/step - accuracy: 0.2892 - loss: 2.2674 - val_accuracy: 0.3900 - val_loss: 1.5940 - learning_rate: 1.0000e-04
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.2957 - loss: 2.0998 
Epoch 5: val_accuracy improved from 0.39000 to 0.41000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m375s[0m 15s/step - accuracy: 0.2959 - loss: 2.1020 - val_accuracy: 0.4100 - val_loss: 1.4995 - learning_rate: 1.0000e-04
Epoch 6/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.2949 - loss: 2.1175 
Epoch 6: val_accuracy improved from 0.41000 to 0.42000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 16s/step - accuracy: 0.2955 - loss: 2.1158 - val_accuracy: 0.4200 - val_loss: 1.4203 - learning_rate: 1.0000e-04
Epoch 7/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.3126 - loss: 2.1078 
Epoch 7: val_accuracy improved from 0.42000 to 0.45000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 15s/step - accuracy: 0.3140 - loss: 2.1071 - val_accuracy: 0.4500 - val_loss: 1.3563 - learning_rate: 1.0000e-04
Epoch 8/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.3504 - loss: 1.9461 
Epoch 8: val_accuracy improved from 0.45000 to 0.46000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 15s/step - accuracy: 0.3503 - loss: 1.9464 - val_accuracy: 0.4600 - val_loss: 1.2936 - learning_rate: 1.0000e-04
Epoch 9/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.3690 - loss: 1.8865 
Epoch 9: val_accuracy did not improve from 0.46000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 15s/step - accuracy: 0.3683 - loss: 1.8873 - val_accuracy: 0.4600 - val_loss: 1.2436 - learning_rate: 1.0000e-04
Epoch 10/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.4055 - loss: 1.7727 
Epoch 10: val_accuracy improved from 0.46000 to 0.47000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 15s/step - accuracy: 0.4053 - loss: 1.7733 - val_accuracy: 0.4700 - val_loss: 1.1985 - learning_rate: 1.0000e-04
Epoch 11/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.4178 - loss: 1.6875 
Epoch 11: val_accuracy improved from 0.47000 to 0.49000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m378s[0m 15s/step - accuracy: 0.4170 - loss: 1.6906 - val_accuracy: 0.4900 - val_loss: 1.1617 - learning_rate: 1.0000e-04
Epoch 12/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.3720 - loss: 1.7725 
Epoch 12: val_accuracy improved from 0.49000 to 0.56000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 15s/step - accuracy: 0.3722 - loss: 1.7723 - val_accuracy: 0.5600 - val_loss: 1.1266 - learning_rate: 1.0000e-04
Epoch 13/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.4121 - loss: 1.8437 
Epoch 13: val_accuracy improved from 0.56000 to 0.58000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 15s/step - accuracy: 0.4132 - loss: 1.8373 - val_accuracy: 0.5800 - val_loss: 1.0915 - learning_rate: 1.0000e-04
Epoch 14/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.4311 - loss: 1.5956 
Epoch 14: val_accuracy improved from 0.58000 to 0.59000, saving model to best_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m380s[0m 15s/step - accuracy: 0.4318 - loss: 1.5948 - val_accuracy: 0.5900 - val_loss: 1.0629 - learning_rate: 1.0000e-04
Epoch 15/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13s/step - accuracy: 0.5041 - loss: 1.5446 
Epoch 15: val_accuracy did not improve from 0.59000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m468s[0m 19s/step - accuracy: 0.5029 - loss: 1.5470 - val_accuracy: 0.5800 - val_loss: 1.0412 - learning_rate: 1.0000e-04
Epoch 16/30
[1m14/25[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m2:19[0m 13s/step - accuracy: 0.4786 - loss: 1.5358

In [None]:
# ✅ Evaluate model
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"🎯 Final Test Accuracy: {accuracy:.4f}")
print(f"💾 Final Test Loss: {loss:.4f}")

# ✅ Save model
model.save("vgg19_tuned_ovarian_model.h5")
print("✅ Model saved as 'vgg19_tuned_ovarian_model.h5'")


In [None]:
# ✅ Plot metrics
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Val Accuracy")
plt.title("Accuracy over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.title("Loss over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# ✅ Plot Accuracy & Loss using Seaborn
plt.figure(figsize=(14, 6))

# Accuracy plot
plt.subplot(1, 2, 1)
sns.lineplot(x=range(len(history.history['accuracy'])), y=history.history["accuracy"], label="Train Acc")
sns.lineplot(x=range(len(history.history['val_accuracy'])), y=history.history["val_accuracy"], label="Val Acc")
plt.title(" Accuracy Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
sns.lineplot(x=range(len(history.history['loss'])), y=history.history["loss"], label="Train Loss")
sns.lineplot(x=range(len(history.history['val_loss'])), y=history.history["val_loss"], label="Val Loss")
plt.title(" Loss Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()

# ✅ Confusion Matrix
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("🧠 Confusion Matrix")
plt.show()

# ✅ Optional: Print classification report
print("🔍 Classification Report:")
print(classification_report(y_true, y_pred, target_names=classes))
