# Requirements

## Libraries

In [1]:
# !pip install -r requirements.txt
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold
from Cavity_detection.models import models
from Cavity_detection.src.utils import *

2023-04-13 00:43:18.237871: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-13 00:43:18.347373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/venv/lib/python3.9/site-packages/cv2/../../lib64:
2023-04-13 00:43:18.347403: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-13 00:43:19.148725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvi

## Directories and definitions

In [2]:
# base_path='/content/drive/MyDrive/Cavity_detection'
base_path= 'Cavity_detection'


In [3]:
processed_caries = os.path.join(base_path, 'data/processed/caries')
processed_no_caries = os.path.join(base_path, 'data/processed/no_caries')
processed_data = os.path.join(base_path, 'data/processed')

In [4]:
# height and width
height = 100
width = 100

# Load dataset

In [5]:
def load_data(data_dir):
    classes = ['no_caries', 'caries']
    images = []
    labels = []
    for class_id, class_name in enumerate(classes):
        class_path = os.path.join(data_dir, class_name)
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
            image = image/255.0 # pixel normalization
            images.append(image)
            labels.append(class_id)
    return np.array(images), np.array(labels)


In [6]:
# Load the data
X, y = load_data(processed_data)

In [7]:
X.shape

(1924, 100, 100)

## Augmentation

In [8]:
# Initialize the augmented dataset
X_augmented = np.empty((len(X)*2, X.shape[1], X.shape[2]), dtype=X.dtype)
y_augmented = np.empty((len(y)*2,), dtype=y.dtype)
# Loop over each image in the dataset
for i in range(len(X)):
    # Perform horizontal flip
    X_augmented[i] = np.flip(X[i], axis=1)
    y_augmented[i] = y[i]
    
    # Perform vertical flip
    # offset of len(x)
    X_augmented[i+len(X)] = np.flip(X[i], axis=0)
    y_augmented[i+len(X)] = y[i]
    
# Concatenate the original dataset and the augmented dataset
X_augmented = np.concatenate((X, X_augmented))
y_augmented = np.concatenate((y, y_augmented))

# Shuffle the augmented dataset while keeping the labels in the correct order
idx = np.random.permutation(len(X_augmented))
X_augmented = X_augmented[idx]
y_augmented = y_augmented[idx]

In [9]:
X_augmented.shape

(5772, 100, 100)

## Data split

In [10]:
# split the data
# X_train, X_test, y_train, y_test = train_test_split(X_augmented, y_augmented, test_size=0.1, random_state=25)

# Train

## Single train/validation/test

In [11]:
# model = create_model((height, width, 1), 16, (3,3))
# history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1)

## Stratified kfold cross validation

### Constants

In [12]:
#folds
folds = 10
repeats = 3
epochs = 30
random_seed = 25
X_data = X
y_data = y

### Train CNN

In [13]:
def define_callbacks(model_name):
    # Clean model folder
    empty_directory(f'Cavity_detection/models/{model_name}')

    # Define the callback to log the training history
    csv_logger_callback = CSVLogger(
        filename=f'{base_path}/models/{model_name}/training_history.csv',
        append=True
    )

    # Define the callback to save the best model
    best_model_callback = ModelCheckpoint(
        filepath=f'{base_path}/models/{model_name}/best_model.h5',
        monitor='val_auc',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    return [csv_logger_callback, best_model_callback]

In [14]:

callbacks_list = define_callbacks('CNN')

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, val_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_val_fold, y_val_fold = X_data[val_idx], y_data[val_idx]

    # Model
    model = models.define_CNN(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    
    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_data=(X_val_fold, y_val_fold), callbacks=callbacks_list, verbose=0)


Fold: 0
2023-04-13 00:43:24.441857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/venv/lib/python3.9/site-packages/cv2/../../lib64:
2023-04-13 00:43:24.441901: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-04-13 00:43:24.441925: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (p-8f63fdc3-7b28-4cc7-8735-09f23d070fd3): /proc/driver/nvidia/version does not exist
2023-04-13 00:43:24.442170: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow 

### Train DCNN1

In [16]:
callbacks_list = define_callbacks('DCNN1')

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, val_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_val_fold, y_val_fold = X_data[val_idx], y_data[val_idx]

    # Model
    model = models.define_DCNN1(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    
    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_data=(X_val_fold, y_val_fold), callbacks= callbacks_list, verbose=0)


Fold: 0
Fold: 1
Fold: 2
Fold: 3
Fold: 4
Fold: 5
Fold: 6
Fold: 7
Fold: 8
Fold: 9
Fold: 10
Fold: 11
Fold: 12
Fold: 13
Fold: 14
Fold: 15
Fold: 16
Fold: 17
Fold: 18
Fold: 19
Fold: 20
Fold: 21
Fold: 22
Fold: 23
Fold: 24
Fold: 25
Fold: 26
Fold: 27
Fold: 28
Fold: 29


### Train DCNN2

In [17]:
callbacks_list = define_callbacks('DCNN2')

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, val_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_val_fold, y_val_fold = X_data[val_idx], y_data[val_idx]

    # Model
    model = models.define_DCNN2(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    
    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_data=(X_val_fold, y_val_fold), callbacks=callbacks_list, verbose=0)


Fold: 0
Fold: 1
Fold: 2
Fold: 3
Fold: 4
Fold: 5
Fold: 6
Fold: 7
Fold: 8
Fold: 9
Fold: 10
Fold: 11
Fold: 12
Fold: 13
Fold: 14
Fold: 15
Fold: 16
Fold: 17
Fold: 18
Fold: 19
Fold: 20
Fold: 21
Fold: 22
Fold: 23
Fold: 24
Fold: 25
Fold: 26
Fold: 27
Fold: 28
Fold: 29


### Train DCNN3

In [0]:
callbacks_list = define_callbacks('DCNN3')

# Define the cross-validator
rskf = RepeatedStratifiedKFold(n_repeats=repeats, n_splits=folds, random_state=random_seed)

# Loop over the folds
for fold_number, (train_idx, val_idx) in enumerate(rskf.split(X_data, y_data)):
    print('Fold:', fold_number)
    X_train_fold, y_train_fold = X_data[train_idx], y_data[train_idx]
    X_val_fold, y_val_fold = X_data[val_idx], y_data[val_idx]

    # Model
    model = models.define_DCNN3(1, height)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','Precision', 'Recall', 'AUC'])

    
    # Train the model
    model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=32, validation_data=(X_val_fold, y_val_fold), callbacks=callbacks_list, verbose=0)


Fold: 0
Fold: 1
Fold: 2
Fold: 3
Fold: 4
Fold: 5
Fold: 6
Fold: 7
Fold: 8
Fold: 9
Fold: 10
Fold: 11
Fold: 12
Fold: 13
Fold: 14
Fold: 15
Fold: 16
Fold: 17
Fold: 18


KernelInterrupted: Execution interrupted by the Jupyter kernel.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=8f63fdc3-7b28-4cc7-8735-09f23d070fd3' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>