<a href="https://colab.research.google.com/github/samiha-mahin/Ovarian-cancer/blob/main/Multimodal_VGG19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install kaggle



In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"samihamuntahamahin","key":"1ad5aaba9143fbd38da418ad8d278398"}'}

In [3]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets list -s "ovarian cancer"

ref                                                             title                                                      size  lastUpdated                 downloadCount  voteCount  usabilityRating  
--------------------------------------------------------------  --------------------------------------------------  -----------  --------------------------  -------------  ---------  ---------------  
saurabhshahane/predict-ovarian-cancer                           Predict Ovarian Cancer                                   322564  2021-02-06 08:15:47.360000           2920         47  0.7058824        
yoshifumimiya/6-ovarian-cancer-datasets                         7 ovarian cancer datasets                             239456735  2024-04-21 13:37:49.933000           3571         49  0.7647059        
sunilthite/ovarian-cancer-classification-dataset                Ovarian Cancer Subtype Classification                3554471607  2023-10-31 08:34:17.390000           1652         39  1.0          

In [5]:
!kaggle datasets download -d bitsnpieces/ovarian-cancer-and-subtypes-dataset-histopathology --unzip

Dataset URL: https://www.kaggle.com/datasets/bitsnpieces/ovarian-cancer-and-subtypes-dataset-histopathology
License(s): CC-BY-SA-4.0


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

# --------------------------
# Define Dataset Paths and Classes
# --------------------------
IMG_SIZE = 224
DATASET_PATH = "OvarianCancer"  # Image Dataset path
classes = ["Mucinous", "Non_Cancerous", "Endometri", "Serous", "Clear_Cell"]
class_mapping = {class_name: i for i, class_name in enumerate(classes)}

# --------------------------
# Load Image Data
# --------------------------
X_images, y_labels = [], []

for class_name in classes:
    class_path = os.path.join(DATASET_PATH, class_name)

    if not os.path.exists(class_path):
        continue  # Skip if folder doesn't exist

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip unreadable images

        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = tf.keras.applications.vgg19.preprocess_input(img)  # Preprocess for VGG19
        X_images.append(img)
        y_labels.append(class_mapping[class_name])  # Assign label

X_images = np.array(X_images)
y_labels = to_categorical(y_labels, num_classes=len(classes))  # One-hot encode labels

# --------------------------
# Load and Preprocess Tabular Data
# --------------------------
df = pd.read_csv("Ovarian_patient_data.csv")  # Replace with your actual CSV file path
df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()
features = ['age', 'ca125', 'cancerstage', 'bmi']  # Use relevant features
df = df.dropna(subset=features).reset_index(drop=True)
df = df.iloc[:len(X_images)]  # Align tabular data with image data

# Normalize the tabular data
scaler = StandardScaler()
X_tabular = scaler.fit_transform(df[features].values)

# --------------------------
# Train-test split
# --------------------------
X_train_img, X_test_img, X_train_tab, X_test_tab, y_train, y_test = train_test_split(
    X_images, X_tabular, y_labels, test_size=0.2, random_state=42
)

# --------------------------
# Define the Image Model (VGG19)
# --------------------------
base_model = VGG19(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Freeze all layers of VGG19
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of VGG19
x = GlobalAveragePooling2D()(base_model.output)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)

# --------------------------
# Define the Tabular Model
# --------------------------
tab_input = tf.keras.Input(shape=(X_tabular.shape[1],), name="tabular_input")
y = Dense(64, activation="relu")(tab_input)
y = Dense(32, activation="relu")(y)

# --------------------------
# Combine Image and Tabular Models
# --------------------------
combined = tf.keras.layers.concatenate([x, y])
z = Dense(64, activation="relu")(combined)
output = Dense(len(classes), activation="softmax")(z)

# --------------------------
# Compile the Model
# --------------------------
model = Model(inputs=[base_model.input, tab_input], outputs=output)

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# --------------------------
# Callbacks
# --------------------------
lr_scheduler = ReduceLROnPlateau(monitor="val_accuracy", factor=0.5, patience=5, verbose=1)
checkpoint = ModelCheckpoint("best_model_multimodal.h5", monitor="val_accuracy", save_best_only=True, verbose=1)

# --------------------------
# Data Augmentation (For Image Data)
# --------------------------
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.1,
    horizontal_flip=True
)

# Train the model
history = model.fit(
    [X_train_img, X_train_tab],
    y_train,
    validation_data=([X_test_img, X_test_tab], y_test),
    epochs=20,  # Adjust the number of epochs
    batch_size=8,  # Batch size (adjust as needed)
    verbose=1,
    callbacks=[lr_scheduler, checkpoint]
)

# --------------------------
# Evaluate the Model
# --------------------------
model.load_weights("best_model_multimodal.h5")
loss, acc = model.evaluate([X_test_img, X_test_tab], y_test)
print(f"\n✅ Best Multimodal Accuracy: {acc * 100:.2f}%")

# --------------------------
# Plot accuracy and loss
# --------------------------
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Val Accuracy")
plt.title("Accuracy over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.title("Loss over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.1584 - loss: 2.5411
Epoch 1: val_accuracy improved from -inf to 0.20000, saving model to best_model_multimodal.h5




[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m390s[0m 8s/step - accuracy: 0.1591 - loss: 2.5383 - val_accuracy: 0.2000 - val_loss: 2.1225 - learning_rate: 1.0000e-04
Epoch 2/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.2498 - loss: 2.1028
Epoch 2: val_accuracy did not improve from 0.20000
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 7s/step - accuracy: 0.2500 - loss: 2.1015 - val_accuracy: 0.2000 - val_loss: 1.8439 - learning_rate: 1.0000e-04
Epoch 3/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.2391 - loss: 2.0450
Epoch 3: val_accuracy improved from 0.20000 to 0.27000, saving model to best_model_multimodal.h5




[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m396s[0m 8s/step - accuracy: 0.2400 - loss: 2.0417 - val_accuracy: 0.2700 - val_loss: 1.7133 - learning_rate: 1.0000e-04
Epoch 4/20
[1m12/50[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m3:49[0m 6s/step - accuracy: 0.2664 - loss: 1.9612