<a href="https://colab.research.google.com/github/samiha-mahin/Ovarian-cancer/blob/main/ResNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install kaggle tensorflow numpy pandas opencv-python matplotlib scikit-learn




In [2]:
from google.colab import files
files.upload()  # Upload your kaggle.json file

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"samihamuntahamahin","key":"1ad5aaba9143fbd38da418ad8d278398"}'}

In [3]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets download -d bitsnpieces/ovarian-cancer-and-subtypes-dataset-histopathology --unzip


Dataset URL: https://www.kaggle.com/datasets/bitsnpieces/ovarian-cancer-and-subtypes-dataset-histopathology
License(s): CC-BY-SA-4.0


In [5]:
import os

# List all files in the directory
os.listdir()

['.config', 'OvarianCancer', 'sample_data']

In [6]:
import os

# Check files inside the OvarianCancer folder
os.listdir("OvarianCancer")


['Non_Cancerous',
 'Clear_Cell',
 'Endometri',
 'README.txt',
 'Mucinous',
 'Serous']

In [7]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical



In [8]:
# Define the dataset path
DATASET_PATH = "OvarianCancer"  # Folder name of unzipped dataset

# Cancer classes (based on folder names)
classes = ["Mucinous", "Non_Cancerous", "Endometri", "Serous", "Clear_Cell"]
class_mapping = {class_name: i for i, class_name in enumerate(classes)}

# Image size for ResNet50
IMG_SIZE = 224

# Lists to store images and labels
X, y = [], []

# Load and preprocess images
for class_name in classes:
    class_path = os.path.join(DATASET_PATH, class_name)

    if not os.path.exists(class_path):
        continue  # Skip if folder doesn't exist

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)

        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip unreadable images

        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))  # Resize to fit ResNet50 input
        img = tf.keras.applications.resnet50.preprocess_input(img)  # ResNet50 preprocessing
        X.append(img)
        y.append(class_mapping[class_name])  # Assign label

# Convert lists to NumPy arrays
X = np.array(X)
y = to_categorical(y, num_classes=len(classes))  # One-hot encode labels

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Dataset loaded: {len(X)} images")


Dataset loaded: 497 images


In [9]:
# Load pre-trained ResNet50 model (exclude the top layers)
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Unfreeze last few layers for fine-tuning
for layer in base_model.layers[-5:]:
    layer.trainable = True

# Add custom layers for our dataset
x = GlobalAveragePooling2D()(base_model.output)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)  # Prevent overfitting
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
output_layer = Dense(len(classes), activation="softmax")(x)

# Create model by connecting base model and custom layers
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [10]:
# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

# Apply augmentation to training data
train_generator = datagen.flow(X_train, y_train, batch_size=16)


In [None]:
# Train the model
history = model.fit(train_generator, validation_data=(X_test, y_test), epochs=20)

# Evaluate model performance on test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")
print(f"Test Loss: {loss:.4f}")


  self._warn_if_super_not_called()


Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 12s/step - accuracy: 0.3467 - loss: 3.0227 - val_accuracy: 0.1600 - val_loss: 256080.9531
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 11s/step - accuracy: 0.4049 - loss: 2.7939 - val_accuracy: 0.2700 - val_loss: 90604.9062
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 12s/step - accuracy: 0.3875 - loss: 3.3132 - val_accuracy: 0.1000 - val_loss: 13124.0273
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 11s/step - accuracy: 0.3807 - loss: 4.0748 - val_accuracy: 0.2700 - val_loss: 22643.7402
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.4588 - loss: 3.1477 