# Requirements

## Libraries

In [1]:
# !pip install -r requirements.txt
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold
from Cavity_detection.models import models
from Cavity_detection.src.utils import *

2023-04-15 23:08:23.135003: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-15 23:08:23.282526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/venv/lib/python3.9/site-packages/cv2/../../lib64:
2023-04-15 23:08:23.282562: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-15 23:08:24.150160: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror

## Directories and definitions

In [2]:
# base_path='/content/drive/MyDrive/Cavity_detection'
base_path= 'Cavity_detection'

In [3]:
processed_caries = os.path.join(base_path, 'data/processed/caries')
processed_no_caries = os.path.join(base_path, 'data/processed/no_caries')
processed_data = os.path.join(base_path, 'data/processed')
test_data = os.path.join(base_path, 'data/test')

In [4]:
# height and width
height = 100
width = 100

# Single folder dataset

In [5]:
import csv

def move_images(src_dir, dest_dir, class_dict):

    # Create the destination directory if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    
    # Initialize a counter for the image filenames
    count = 0
    
    # Open the CSV file for writing
    with open(os.path.join(dest_dir, 'labels.csv'), 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        
        # Loop through each class folder
        for class_name in class_dict.keys():
            class_dir = os.path.join(src_dir, class_name)
            
            # Loop through each image in the class folder
            for filename in os.listdir(class_dir):
                if filename.endswith('.jpg') or filename.endswith('.png'): # modify this line as per your image extensions
                    # Move the image to the destination directory and rename it
                    src_path = os.path.join(class_dir, filename)
                    dest_path = os.path.join(dest_dir, f'{count}.jpg') # modify extension here as per your image format
                    shutil.copy(src_path, dest_path)
                    
                    # Write the class identifier to the CSV file
                    writer.writerow([class_dict[class_name]])
                    
                    # Increment the counter
                    count += 1


In [6]:
# class_dict = {'no_caries': 0, 'caries': 1}
# move_images(processed_data, '/work/dataset_cvs', class_dict)

# Load dataset

In [7]:
def load_data(data_dir):
    classes = ['no_caries', 'caries']
    images = []
    labels = []
    for class_id, class_name in enumerate(classes):
        class_path = os.path.join(data_dir, class_name)
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
            image = image/255.0 # pixel normalization
            images.append(image)
            labels.append(class_id)
    return np.array(images), np.array(labels)


In [8]:
# Load the data
X, y = load_data(processed_data)

In [9]:
X.shape

(1924, 100, 100)

## Augmentation

In [10]:
augmentation = False

In [11]:
if augmentation:
    # Initialize the augmented dataset
    X_augmented = np.empty((len(X)*2, X.shape[1], X.shape[2]), dtype=X.dtype)
    y_augmented = np.empty((len(y)*2,), dtype=y.dtype)
    # Loop over each image in the dataset
    for i in range(len(X)):
        # Perform horizontal flip
        X_augmented[i] = np.flip(X[i], axis=1)
        y_augmented[i] = y[i]
        
        # Perform vertical flip
        # offset of len(x)
        X_augmented[i+len(X)] = np.flip(X[i], axis=0)
        y_augmented[i+len(X)] = y[i]
        
    # Concatenate the original dataset and the augmented dataset
    X_augmented = np.concatenate((X, X_augmented))
    y_augmented = np.concatenate((y, y_augmented))

    # Shuffle the augmented dataset while keeping the labels in the correct order
    idx = np.random.permutation(len(X_augmented))
    X_augmented = X_augmented[idx]
    y_augmented = y_augmented[idx]

## Data split

In [12]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=25)

In [13]:
# Save the arrays to disk
# np.save('X_train.npy', X_train)
np.save(os.path.join(test_data,'X_test.npy'), X_test)
# np.save('y_train.npy', y_train)
np.save(os.path.join(test_data,'y_test.npy'), y_test)

# Train

## Single train/validation/test

In [14]:
# model = create_model((height, width, 1), 16, (3,3))
# history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1)

## Stratified kfold cross validation

### Constants

In [20]:
#folds
folds = 10
repeats = 1
epochs = 30
random_seed = 25
X_data = X_train
y_data = y_train
metric_name = 'auc' #for best model monitor

In [18]:
def get_csv_logger_callback(model_name):
    # Clean model folder
    empty_directory(f'Cavity_detection/models/{model_name}')

    # Define the callback to log the training history
    csv_logger_callback = CSVLogger(
        filename=f'Cavity_detection/models/{model_name}/training_history.csv',
        append=True
    )

    return [csv_logger_callback]

def get_best_model_callback(model_name):
    # Define the callback to save the best model
    best_model_callback = ModelCheckpoint(
        filepath=f'Cavity_detection/models/{model_name}/best_model.h5',
        monitor='auc',
        save_best_only=True,
        mode='max',
        verbose=1
    )

    return [best_model_callback]


### Train CNN

In [21]:
model_name = 'CNN'
history_log = get_csv_logger_callback(model_name)

# for best model saving
metric_last_value = 0 

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, test_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_test_fold, y_test_fold = X_data[test_idx], y_data[test_idx]

    # Model
    model = models.define_CNN(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_split=0.1, callbacks=history_log, verbose=0)

    # Test the model
    test = model.evaluate(X_test_fold, y_test_fold, verbose = 1)

    # save best model
    metric_actual_value = test[model.metrics_names.index(metric_name)]
    if ( metric_actual_value > metric_last_value):
        model.save(f'Cavity_detection/models/{model_name}/best_model.h5')
        metric_last_value = metric_actual_value
        print('Saved best model')


Fold: 0
Saved best model
Fold: 1
Saved best model
Fold: 2
Fold: 3
Saved best model
Fold: 4
Saved best model
Fold: 5
Fold: 6
Saved best model
Fold: 7
Fold: 8
Fold: 9


### Train DCNN1

In [22]:
model_name = 'DCNN1'
history_log = get_csv_logger_callback(model_name)

# for best model saving
metric_last_value = 0 

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, test_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_test_fold, y_test_fold = X_data[test_idx], y_data[test_idx]

    # Model
    model = models.define_DCNN1(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_split=0.1, callbacks=history_log, verbose=0)

    # Test the model
    test = model.evaluate(X_test_fold, y_test_fold, verbose = 1)

    # save best model
    metric_actual_value = test[model.metrics_names.index(metric_name)]
    if ( metric_actual_value > metric_last_value):
        model.save(f'Cavity_detection/models/{model_name}/best_model.h5')
        metric_last_value = metric_actual_value
        print('Saved best model')


Fold: 0
Saved best model
Fold: 1
Saved best model
Fold: 2
Saved best model
Fold: 3
Fold: 4
Fold: 5
Saved best model
Fold: 6
Saved best model
Fold: 7
Fold: 8
Fold: 9


### Train DCNN2

In [23]:
model_name = 'DCNN2'
history_log = get_csv_logger_callback(model_name)

# for best model saving
metric_last_value = 0 

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, test_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_test_fold, y_test_fold = X_data[test_idx], y_data[test_idx]

    # Model
    model = models.define_DCNN2(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_split=0.1, callbacks=history_log, verbose=0)

    # Test the model
    test = model.evaluate(X_test_fold, y_test_fold, verbose = 1)

    # save best model
    metric_actual_value = test[model.metrics_names.index(metric_name)]
    if ( metric_actual_value > metric_last_value):
        model.save(f'Cavity_detection/models/{model_name}/best_model.h5')
        metric_last_value = metric_actual_value
        print('Saved best model')


Fold: 0
Saved best model
Fold: 1
Saved best model
Fold: 2
Saved best model
Fold: 3
Fold: 4
Saved best model
Fold: 5
Fold: 6
Saved best model
Fold: 7
Fold: 8
Fold: 9


### Train DCNN3

In [24]:
model_name = 'DCNN3'
history_log = get_csv_logger_callback(model_name)

# for best model saving
metric_last_value = 0 

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, test_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_test_fold, y_test_fold = X_data[test_idx], y_data[test_idx]

    # Model
    model = models.define_DCNN3(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_split=0.1, callbacks=history_log, verbose=0)

    # Test the model
    test = model.evaluate(X_test_fold, y_test_fold, verbose = 1)

    # save best model
    metric_actual_value = test[model.metrics_names.index(metric_name)]
    if ( metric_actual_value > metric_last_value):
        model.save(f'Cavity_detection/models/{model_name}/best_model.h5')
        metric_last_value = metric_actual_value
        print('Saved best model')


Fold: 0
Saved best model
Fold: 1
Saved best model
Fold: 2
Saved best model
Fold: 3
Saved best model
Fold: 4
Saved best model
Fold: 5
Saved best model
Fold: 6
Fold: 7
Fold: 8
Fold: 9


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=8f63fdc3-7b28-4cc7-8735-09f23d070fd3' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>