In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
from PIL import Image
import pandas as pd
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam

In [None]:
data_dir = '/content/drive/MyDrive/images_patch'
train_csv_file = '/content/drive/MyDrive/sabpatch_parsed_folders.csv'
test_csv_file = '/content/drive/MyDrive/sabpatch_parsed_test.csv'

In [None]:
train_df = pd.read_csv(train_csv_file)
test_df = pd.read_csv(test_csv_file)

In [None]:
image_files = sorted(os.listdir(data_dir))
train_image_paths = [os.path.join(data_dir, img_file) for img_file in image_files[:3136]]
test_image_paths = [os.path.join(data_dir, img_file) for img_file in image_files[3136:]]
train_image_paths

['/content/drive/MyDrive/images_patch/p0000.png',
 '/content/drive/MyDrive/images_patch/p0001.png',
 '/content/drive/MyDrive/images_patch/p0002.png',
 '/content/drive/MyDrive/images_patch/p0003.png',
 '/content/drive/MyDrive/images_patch/p0004.png',
 '/content/drive/MyDrive/images_patch/p0005.png',
 '/content/drive/MyDrive/images_patch/p0006.png',
 '/content/drive/MyDrive/images_patch/p0007.png',
 '/content/drive/MyDrive/images_patch/p0008.png',
 '/content/drive/MyDrive/images_patch/p0009.png',
 '/content/drive/MyDrive/images_patch/p0010.png',
 '/content/drive/MyDrive/images_patch/p0011.png',
 '/content/drive/MyDrive/images_patch/p0012.png',
 '/content/drive/MyDrive/images_patch/p0013.png',
 '/content/drive/MyDrive/images_patch/p0014.png',
 '/content/drive/MyDrive/images_patch/p0015.png',
 '/content/drive/MyDrive/images_patch/p0016.png',
 '/content/drive/MyDrive/images_patch/p0017.png',
 '/content/drive/MyDrive/images_patch/p0018.png',
 '/content/drive/MyDrive/images_patch/p0019.png',


In [None]:
# Shuffling the training data
train_data = list(zip(train_image_paths, train_df['label_number']))
np.random.shuffle(train_data)
train_image_paths, train_labels = zip(*train_data)

# Shuffling the testing data
test_data = list(zip(test_image_paths, test_df['label_number']))
np.random.shuffle(test_data)
test_image_paths, test_labels = zip(*test_data)

In [None]:
batch_size = 32
img_size = (224, 224)
epochs = 10

In [None]:
# Data generators for putting data into batches
def data_generator(image_paths, labels, img_size, batch_size):
    while True:
        for start in range(0, len(image_paths), batch_size):
            end = min(start + batch_size, len(image_paths))
            X_batch = []
            y_batch = []
            for img_path, label in zip(image_paths[start:end], labels[start:end]):
                img = Image.open(img_path)
                img = img.resize(img_size)
                img_array = img_to_array(img) / 255.0
                X_batch.append(img_array)
                y_batch.append(label)
            X_batch = np.array(X_batch)
            y_batch = to_categorical(y_batch, num_classes=3)
            yield X_batch, y_batch

train_generator = data_generator(train_image_paths, train_labels, img_size, batch_size)
test_generator = data_generator(test_image_paths, test_labels, img_size, batch_size)

In [None]:
#  defining the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))  # 3 classes: OSCC, Leukoplakia without dysplasia, Leukoplakia with dysplasia

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
# number of steps per epoch
steps_per_epoch = len(train_image_paths) // batch_size
validation_steps = len(test_image_paths) // batch_size

# Training the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=test_generator,
    validation_steps=validation_steps
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Evaluating the model to find overall accuracy, classification report
from sklearn.metrics import classification_report
total_loss = 0
total_accuracy = 0
total_samples = 0

y_true = []
y_pred = []

for X_test_batch, y_test_batch in test_generator:
    loss, accuracy = model.evaluate(X_test_batch, y_test_batch, verbose=0)
    total_loss += loss * len(X_test_batch)
    total_accuracy += accuracy * len(X_test_batch)
    total_samples += len(X_test_batch)

    y_true.extend(np.argmax(y_test_batch, axis=1))
    y_pred.extend(np.argmax(model.predict(X_test_batch), axis=1))

    if total_samples >= len(test_image_paths):
        break

overall_loss = total_loss / total_samples
overall_accuracy = total_accuracy / total_samples

print(f'Overall Test loss: {overall_loss:.4f}, Overall Test accuracy: {overall_accuracy:.4f}')

print(classification_report(y_true, y_pred, target_names=['OSCC', 'Leukoplakia without dysplasia', 'Leukoplakia with dysplasia']))

Overall Test loss: 0.9792, Overall Test accuracy: 0.5965
                               precision    recall  f1-score   support

                         OSCC       0.59      0.29      0.39       118
Leukoplakia without dysplasia       0.63      0.47      0.54       187
   Leukoplakia with dysplasia       0.59      0.79      0.67       322

                     accuracy                           0.60       627
                    macro avg       0.60      0.51      0.53       627
                 weighted avg       0.60      0.60      0.58       627

