In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import random
import tensorflow as tf
from tensorflow.keras.models import Model ,load_model
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import ResNet101
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras import regularizers
from sklearn.metrics import classification_report, confusion_matrix

# Set a seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
dataset_path = '/kaggle/input/multi-cancer/Multi Cancer/Multi Cancer/Cervical Cancer'

In [None]:
# Define paths to the image categories
# Update this to your dataset path
cervix_categories = ['cervix_dyk', 'cervix_koc', 'cervix_mep', 'cervix_pab', 'cervix_sfi']

# Initialize a dictionary to store the count of images for each category
image_counts = {}

# Count images in each folder
for category in cervix_categories:
    category_path = os.path.join(dataset_path, category)
    image_counts[category] = len(os.listdir(category_path))

# Print the total number of images in each category
for category, count in image_counts.items():
    print(f"Total {category} images: {count}")

# Function to display multiple sample images from a category
def display_samples(image_paths, title, num_samples=3):
    plt.figure(figsize=(15, 5))
    for i in range(num_samples):
        img_path = image_paths[i]
        img = cv2.imread(img_path)  # Read the image
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB format
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(img_rgb)
        plt.title(f"{title} {i + 1}")
        plt.axis('off')
    plt.show()
    # Get and display 3 sample images from each category
for category in cervix_categories:
    category_path = os.path.join(dataset_path, category)
    images_list = os.listdir(category_path)
    display_samples([os.path.join(category_path, img) for img in images_list[:3]], f"{category} Sample Image")

In [None]:
data_dir='/kaggle/input/multi-cancer/Multi Cancer/Multi Cancer'
target_folder='Cervical Cancer'
filepath=[]
labels=[]
all_folder_path = os.path.join(data_dir, target_folder)

if os.path.isdir(all_folder_path):
    filelist = os.listdir(all_folder_path)
    for f in filelist:
        fpath = os.path.join(all_folder_path, f)
        fipath=os.listdir(fpath)
        for image in fipath:
            path = os.path.join(fpath, image)
            filepath.append(path)
            labels.append(f)  

f_series = pd.Series(filepath, name='filepath')
l_series = pd.Series(labels, name='labels')
df = pd.concat([f_series, l_series], axis=1)
# Display the first few rows
print(df.head())

# Check data types
print(df.dtypes)

# Check for missing values
print(df.isnull().sum())

# Show dataset statistics
print(df.describe())

In [None]:
df

In [None]:
counts=df['labels'].value_counts()
counts

In [None]:
# Set the style and color palette
sns.set(style="whitegrid")
palette = sns.color_palette("coolwarm", n_colors=len(counts))

# Create the bar plot
plt.figure(figsize=(12, 8))
bar_plot = sns.barplot(x=counts.index, y=counts.values, palette=palette)

# Add title and labels with custom font sizes
plt.title('The Count of Category', fontsize=20, fontweight='bold')
plt.xlabel('Labels', fontsize=16)
plt.ylabel('Count', fontsize=16)

# Add value annotations on top of the bars
for p in bar_plot.patches:
    bar_plot.annotate(f'{int(p.get_height())}', 
                      (p.get_x() + p.get_width() / 2., p.get_height()), 
                      ha='center', va='bottom', 
                      fontsize=12, color='black', 
                      xytext=(0, 5), 
                      textcoords='offset points')

# Show the plot
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
strat=df['labels']
train_df ,dummy_df=train_test_split(df,test_size=0.3,random_state=42,stratify=strat)
strate=dummy_df['labels']
valid_df,test_df=train_test_split(dummy_df,test_size=0.5,random_state=42,stratify=strate)

In [None]:
print(train_df.shape)
print(valid_df.shape)
print(test_df.shape)

In [None]:
batch_size = 64
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
tr_gen = ImageDataGenerator()
ts_gen = ImageDataGenerator()

train_gen = tr_gen.flow_from_dataframe(train_df, x_col= 'filepath', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

valid_gen = ts_gen.flow_from_dataframe(valid_df, x_col= 'filepath', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= 'rgb', shuffle= True, batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe(test_df, x_col= 'filepath', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= False, batch_size= batch_size)

In [None]:
# Build the model using ResNet101
base_model = ResNet101(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model
for layer in base_model.layers:
    layer.trainable = False

# Create a new model for multi-class classification
model3 = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),  
    Dropout(0.25),
    Dense(64, activation='relu'), 
    Dropout(0.25),
    Dense(5, activation='softmax')  # Change to 5 classes with softmax activation
])

# Compile the model with Adamax optimizer
optimizer = Adamax(learning_rate=0.002)  # You can adjust the learning rate
model3.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])  # Change loss to categorical_crossentropy

In [None]:
# Define the early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model_checkpoint = ModelCheckpoint(
    'best_model.keras',  # Filepath to save the model
    monitor='val_loss',  # Metric to monitor
    save_best_only=True,  # Save only the best model
    verbose=1  # Verbosity mode
)


# Fit the model with both callbacks
history = model3.fit(
     train_gen,
     epochs=50,
     validation_data=valid_gen,
     shuffle=False,
     callbacks=[early_stopping, model_checkpoint],
     validation_steps=None
)

In [None]:
tf.keras.utils.plot_model(model3,show_shapes=True)


In [None]:
# Load the best model from the checkpoint
best_model = load_model('best_model.keras')

# Evaluate the model on the test dataset
test_loss, test_accuracy = best_model.evaluate(test_gen, verbose=2)

# Print the evaluation results
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

In [None]:
# Evaluate the best model on the test set
test_loss, test_accuracy = model3.evaluate(test_gen)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.2f}')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Make predictions on the test set using the test generator
y_pred_prob = best_model.predict(test_gen)  # Get predicted probabilities
y_pred_classes = np.argmax(y_pred_prob, axis=1)  # Get the predicted class labels

# Get the true labels from the generator
y_true = test_gen.classes  # True labels

# Generate the confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)

# Plot the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Generate classification report
class_report = classification_report(y_true, y_pred_classes, target_names=["Class 0", "Class 1","Class 2","Class 3","Class 4"])
print(class_report)

In [None]:
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]

Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

# Plot training history
plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout
plt.show()

In [None]:
model3.save("Cervical Cancer.h5")

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Define paths to the image categories
# Update this to your dataset path
dataset_path = '/kaggle/input/multi-cancer/Multi Cancer/Multi Cancer/Cervical Cancer'  # Change this to your dataset path
cervix_categories = ['cervix_dyk', 'cervix_koc', 'cervix_mep', 'cervix_pab', 'cervix_sfi']
num_classes = len(cervix_categories)

# Initialize lists to store image paths and labels
image_paths = []
actual_labels = []

# Count images in each folder and prepare paths and labels
for index, category in enumerate(cervix_categories):
    category_path = os.path.join(dataset_path, category)
    images_list = os.listdir(category_path)
    image_count = len(images_list)
    print(f"Total {category} images: {image_count}")

    # Extend the lists with image paths and corresponding labels
    image_paths.extend([os.path.join(category_path, img) for img in images_list])
    actual_labels.extend([index] * image_count)  # Assign labels based on the category index

# Function to display multiple sample images from a category with predictions
def display_samples(image_paths, actual_labels, model, num_samples=3):
    selected_indices = np.random.choice(len(image_paths), num_samples, replace=False)
    plt.figure(figsize=(15, 5))
    
    for i, idx in enumerate(selected_indices):
        img_path = image_paths[idx]
        img = cv2.imread(img_path)  # Read the image
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB format
        
        # Prepare the image for prediction
        img_resized = cv2.resize(img_rgb, (224, 224))  # Resize for model input
        img_array = np.expand_dims(img_resized, axis=0)  # Add batch dimension
        
        # Predict the label
        prediction = model3.predict(img_array)
        predicted_label = np.argmax(prediction, axis=-1)[0]  # Get predicted class index
        actual_label = actual_labels[idx]  # Actual label
        
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(img_rgb)
        plt.title(f"Actual: {cervix_categories[actual_label]}\nPredicted: {cervix_categories[predicted_label]}")
        plt.axis('off')
    plt.show()

# Display 3 random sample images from each category
for category in cervix_categories:
    display_samples(image_paths, actual_labels, model3)