In [1]:
import os
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
import zipfile
import shutil
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, RandomContrast, Rescaling
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from keras import Input

In [2]:
DATA_DIR = '/kaggle/input/oral-diseases/unprocessed'
categories = ['calculus', 'caries', 'gingivitis', 'hypodontia', 'tooth_discoloration', 'ulcer']

# Define directories for train, validation, and test splits
base_dir = '/kaggle/working/dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Create the train, validation, and test directories with subdirectories for each category
for category in categories:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

# Split each category
for category in categories:
    category_dir = os.path.join(DATA_DIR, category)
    filenames = os.listdir(category_dir)
    # Split the dataset into train, val, and test sets (e.g., 80% train, 10% validation, 10% test)
    train_filenames, temp_filenames = train_test_split(filenames, test_size=0.2, random_state=42)
    val_filenames, test_filenames = train_test_split(temp_filenames, test_size=0.5, random_state=42)

    for filename in train_filenames:
        shutil.copy(os.path.join(category_dir, filename), os.path.join(train_dir, category, filename))
    for filename in val_filenames:
        shutil.copy(os.path.join(category_dir, filename), os.path.join(val_dir, category, filename))
    for filename in test_filenames:
        shutil.copy(os.path.join(category_dir, filename), os.path.join(test_dir, category, filename))

print("Data split completed!")

Data split completed!


In [6]:
# Set parameters
DIMX, DIMY = 224, 224  # Image dimensions
HIDDEN = 128           # Number of hidden units in dense layer
OUTPUT_CLASSES = 6     # Number of output classes
BATCH_SIZE = 32
INITIAL_LEARNING_RATE = 0.001
NUM_FINETUNE_EPOCHS = 10

# Data generators
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(base_dir, "train"),
    target_size=(DIMX, DIMY),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)
val_generator = val_datagen.flow_from_directory(
    os.path.join(base_dir, "val"),
    target_size=(DIMX, DIMY),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)
test_generator = test_datagen.flow_from_directory(
    os.path.join(base_dir, "test"),
    target_size=(DIMX, DIMY),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)


Found 9319 images belonging to 6 classes.
Found 1165 images belonging to 6 classes.
Found 1169 images belonging to 6 classes.


In [7]:
print("Classes detected:", train_generator.class_indices)

Classes detected: {'calculus': 0, 'caries': 1, 'gingivitis': 2, 'hypodontia': 3, 'tooth_discoloration': 4, 'ulcer': 5}


In [8]:
# Directories for train, val, and test splits
split_directories = {'train': train_dir, 'val': val_dir, 'test': test_dir}

# Count and print the number of images in each category for each split
category_counts = {category: {'train': 0, 'val': 0, 'test': 0} for category in categories}

for split, split_dir in split_directories.items():
    for category in categories:
        category_dir = os.path.join(split_dir, category)
        num_files = len(os.listdir(category_dir))
        category_counts[category][split] = num_files

# Print the total dataset count for each category
for category in categories:
    total_files = sum(category_counts[category].values())
    print(f"Category '{category}': Total files = {total_files}")
    print(f"  Train: {category_counts[category]['train']}")
    print(f"  Validation: {category_counts[category]['val']}")
    print(f"  Test: {category_counts[category]['test']}")


Category 'calculus': Total files = 1296
  Train: 1036
  Validation: 130
  Test: 130
Category 'caries': Total files = 2382
  Train: 1905
  Validation: 238
  Test: 239
Category 'gingivitis': Total files = 2349
  Train: 1879
  Validation: 235
  Test: 235
Category 'hypodontia': Total files = 1251
  Train: 1000
  Validation: 125
  Test: 126
Category 'tooth_discoloration': Total files = 1834
  Train: 1467
  Validation: 183
  Test: 184
Category 'ulcer': Total files = 2541
  Train: 2032
  Validation: 254
  Test: 255


In [9]:
def build_model(output_classes, learning_rate):
    base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(DIMX, DIMY, 3))
    base_model.trainable = False  # Freeze base model

    inputs = Input(shape=(DIMX, DIMY, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(HIDDEN, activation="relu")(x)
    outputs = Dense(output_classes, activation="softmax")(x)

    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model
    
# Unfreeze layers and recompile model
def unfreeze_base_layers(model, layers, learning_rate):
    base_model = model.layers[1]  # Base model is the second layer
    for layer in base_model.layers[-layers:]:
        layer.trainable = True
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# Training the model with gradual fine-tuning
model = build_model(OUTPUT_CLASSES, INITIAL_LEARNING_RATE)
# Callbacks
tensorboard_callback = TensorBoard(log_dir="./logs", histogram_freq=1)
early_stopping = EarlyStopping(monitor="val_loss", patience=3)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
# Step 1: Train only top layers
history_step1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=NUM_FINETUNE_EPOCHS,
    callbacks=[tensorboard_callback, early_stopping]
)

Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1732976665.015524     188 service.cc:145] XLA service 0x796b6000f5b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732976665.015597     188 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1732976665.015602     188 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1732976670.573793     188 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m291/292[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 65ms/step - accuracy: 0.6683 - loss: 0.8799

  self._warn_if_super_not_called()


[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 97ms/step - accuracy: 0.6688 - loss: 0.8782 - val_accuracy: 0.8043 - val_loss: 0.4403
Epoch 2/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 65ms/step - accuracy: 0.8531 - loss: 0.3600 - val_accuracy: 0.8498 - val_loss: 0.3705
Epoch 3/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 65ms/step - accuracy: 0.8951 - loss: 0.2592 - val_accuracy: 0.8704 - val_loss: 0.2987
Epoch 4/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 65ms/step - accuracy: 0.9140 - loss: 0.2098 - val_accuracy: 0.8936 - val_loss: 0.2720
Epoch 5/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 62ms/step - accuracy: 0.9275 - loss: 0.1826 - val_accuracy: 0.8850 - val_loss: 0.2761
Epoch 6/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 64ms/step - accuracy: 0.9338 - loss: 0.1534 - val_accuracy: 0.8807 - val_loss: 0.2613
Epoch 7/10
[1m292/292[0m 

In [11]:
# Evaluate after step 1
loss, accuracy = model.evaluate(test_generator)
print(f"Step 1 - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 141ms/step - accuracy: 0.8878 - loss: 0.2953
Step 1 - Loss: 0.2823, Accuracy: 0.8905


In [12]:
# Step 2: Fine-tune more layers
UNFREEZE_LAYERS = 20  # Number of layers to unfreeze in each step
learning_rate = INITIAL_LEARNING_RATE / 10  # Reduce learning rate
model = unfreeze_base_layers(model, layers=UNFREEZE_LAYERS, learning_rate=learning_rate)

history_step2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=NUM_FINETUNE_EPOCHS,
    callbacks=[tensorboard_callback, early_stopping]
)

Epoch 1/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 91ms/step - accuracy: 0.8304 - loss: 0.4808 - val_accuracy: 0.8798 - val_loss: 0.3860
Epoch 2/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 66ms/step - accuracy: 0.9390 - loss: 0.1409 - val_accuracy: 0.8850 - val_loss: 0.3578
Epoch 3/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 66ms/step - accuracy: 0.9414 - loss: 0.1246 - val_accuracy: 0.9159 - val_loss: 0.2573


In [13]:
# Evaluate after step 2
loss, accuracy = model.evaluate(test_generator)
print(f"Step 2 - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 82ms/step - accuracy: 0.8718 - loss: 0.2937
Step 2 - Loss: 0.3048, Accuracy: 0.8820


In [14]:
# Step 3: Fine-tune deeper layers
UNFREEZE_LAYERS += 20  # Unfreeze more layers
learning_rate /= 2  # Further reduce learning rate
model = unfreeze_base_layers(model, layers=UNFREEZE_LAYERS, learning_rate=learning_rate)

history_step3 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=NUM_FINETUNE_EPOCHS,
    callbacks=[tensorboard_callback, early_stopping]
)

Epoch 1/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 93ms/step - accuracy: 0.9081 - loss: 0.2091 - val_accuracy: 0.8953 - val_loss: 0.3142
Epoch 2/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 67ms/step - accuracy: 0.9435 - loss: 0.1192 - val_accuracy: 0.9021 - val_loss: 0.2670
Epoch 3/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 64ms/step - accuracy: 0.9490 - loss: 0.1029 - val_accuracy: 0.9167 - val_loss: 0.2416
Epoch 4/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 65ms/step - accuracy: 0.9510 - loss: 0.0986 - val_accuracy: 0.9219 - val_loss: 0.2090
Epoch 5/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 67ms/step - accuracy: 0.9471 - loss: 0.1004 - val_accuracy: 0.9176 - val_loss: 0.2180
Epoch 6/10
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 66ms/step - accuracy: 0.9524 - loss: 0.0948 - val_accuracy: 0.9176 - val_loss: 0.2078
Epoch 7/10
[1m2

In [15]:
# Evaluate after step 3
loss, accuracy = model.evaluate(test_generator)
print(f"Step 3 - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 90ms/step - accuracy: 0.8973 - loss: 0.2368
Step 3 - Loss: 0.2510, Accuracy: 0.8982


In [None]:
# Save final model
model.save("fine_tuned_model.keras")

In [None]:
# Save final model
model.save("fine_tuned_model.h5")