In [None]:
import os
import pandas as pd
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

In [None]:
DATA_DIR = 'oral cancer.v3i.tensorflow'
TEST_DIR = os.path.join(DATA_DIR, 'test')
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VALID_DIR = os.path.join(DATA_DIR, 'valid')

In [None]:
test_annotations = pd.read_csv(os.path.join(TEST_DIR, '_annotations.csv'))
train_annotations = pd.read_csv(os.path.join(TRAIN_DIR, '_annotations.csv'))
valid_annotations = pd.read_csv(os.path.join(VALID_DIR, '_annotations.csv'))

In [None]:
test_annotations.info()

In [None]:
def filter_annotations(annotations, img_dir):
    """
    Filters the annotations to include only images present in the specified directory.
    
    Args:
        annotations (pandas.DataFrame): The annotations DataFrame.
        img_dir (str): The directory path containing the images.
        
    Returns:
        pandas.DataFrame: The filtered annotations DataFrame.
    """
    image_files = [f for f in os.listdir(img_dir)]
    annotations = annotations[annotations['filename'].isin(image_files)]
    annotations = annotations.drop_duplicates(subset=['filename'])
    return annotations

test_annotations = filter_annotations(test_annotations, os.path.join(TEST_DIR))
train_annotations = filter_annotations(train_annotations, os.path.join(TRAIN_DIR))
valid_annotations = filter_annotations(valid_annotations, os.path.join(VALID_DIR))

In [None]:
print(len(test_annotations))
print(len(train_annotations))
print(len(valid_annotations))

In [None]:
def separate_images(annotations):
    """
    Separates the images into cancerous and non-cancerous groups based on the annotations.

    Args:
        annotations (pandas.DataFrame): The annotations DataFrame.

    Returns:
        tuple: A tuple containing two lists:
            cancerous (list): A list of cancerous image paths.
            non_cancerous (list): A list of non-cancerous image paths.
    """
    cancerous = []
    non_cancerous = []

    for _, row in annotations.iterrows():
        image_path = os.path.join(row['filename'])
        if row['class'] == "cancer":  # Cancerous
            cancerous.append(image_path)
        elif row["class"] == "no cancer":  # Non-cancerous
            non_cancerous.append(image_path)

    return cancerous, non_cancerous

test_cancerous, test_non_cancerous = separate_images(test_annotations)
train_cancerous, train_non_cancerous = separate_images(train_annotations)
valid_cancerous, valid_non_cancerous = separate_images(valid_annotations)

In [None]:
print(len(test_cancerous), len(test_non_cancerous))
print(len(train_cancerous), len(train_non_cancerous))
print(len(valid_cancerous), len(valid_non_cancerous))

In [None]:
print(test_cancerous[:5])

In [None]:
def load_image(image_path, target_size=None):
    """
    Loads an image from the specified path and optionally resizes it.

    Args:
        image_path (str): The path to the image file.
        target_size (tuple, optional): The target size for resizing the image.

    Returns:
        numpy.ndarray: The loaded and resized image as a NumPy array.
    """
    image = Image.open(image_path)
    if target_size is not None:
        image = image.resize(target_size)
    return np.array(image)

# Load and display sample cancerous and non-cancerous images
sample_cancerous = load_image(os.path.join(TEST_DIR,test_cancerous[0]))
sample_non_cancerous = load_image(os.path.join(TEST_DIR, test_non_cancerous[0]))

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].imshow(sample_cancerous)
axes[0].set_title('Cancerous')
axes[0].axis('off')

axes[1].imshow(sample_non_cancerous)
axes[1].set_title('Non-cancerous')
axes[1].axis('off')

plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Combine cancerous and non-cancerous image paths
X_train = train_cancerous + train_non_cancerous
y_train = ["cancer"] * len(train_cancerous) + ["no cancer"] * len(train_non_cancerous)

X_valid = valid_cancerous + valid_non_cancerous
y_valid = ["cancer"] * len(valid_cancerous) + ["no cancer"] * len(valid_non_cancerous)

# Encode labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_valid = label_encoder.transform(y_valid)

# Split training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data generators for data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

valid_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_annotations,
    directory=os.path.join(TRAIN_DIR, ''),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=valid_annotations,
    directory=os.path.join(VALID_DIR, ''),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Train the model
epochs = 20
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    validation_data=valid_generator,
    validation_steps=len(valid_generator),
    epochs=epochs
)

In [None]:
# Evaluate the model on the validation set
loss, accuracy = model.evaluate(valid_generator)
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")

In [None]:
# Load and preprocess test images
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_annotations,
    directory=os.path.join(TEST_DIR, ''),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Make predictions on the test set
test_predictions = model.predict(test_generator)

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
    except RuntimeError as e:
        print(e)

In [None]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the best model
best_model = tf.keras.models.load_model('best_model.keras')

# Evaluate on the test set
y_pred = (best_model.predict(np.array(X_test)) > 0.5).astype(int).ravel()
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"F1-score: {f1*100:.2f}%")