# LIBRARY

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import cv2
import matplotlib.image as mpimg
import pandas as pd
import wandb
import random
import numpy as np
from wandb.keras import WandbCallback


In [2]:
!pip install visualkeras
import visualkeras

# FOLDER TO CSV

In [3]:
# Rename folder with wrong spell, Kaggle just only-read folder 
# os.rename('../input/teeth-dataset/teeth_dataset/teeth_dataset/Trianing','../input/teeth-dataset/teeth_dataset/teeth_dataset/train')

In [4]:
#FIXME: When upload dataset other
BASE_DIR = '../input/teethdecay/teeth_dataset' #teethdecay
train_folder = os.path.join(BASE_DIR,'train')
test_folder = os.path.join(BASE_DIR,'test')

# label_ = os.listdir(train_folder) #[caries,no-caries]
# TRAIN TO CSV
df_train = pd.DataFrame()
df_image = []
df_label = []

label_num = 0

for i_label in os.listdir(train_folder):
    for j_image in sorted(os.listdir(os.path.join(train_folder,i_label))): #return full images within the folder
        df_image.append(os.path.join('train',i_label,j_image)) # return path_image
        df_label.append(label_num)
    label_num += 1
df_train['images'] = df_image
df_train['target'] = df_label
#caries = 0, no-caries = 1
df_train.to_csv('./train.csv')

# TEST TO CSV
df_test = pd.DataFrame()
df_image = []
df_label = []

label_num = 0

for i_label in os.listdir(test_folder):
    for j_image in sorted(os.listdir(os.path.join(test_folder,i_label))): #return full images within the folder
        df_image.append(os.path.join('test',i_label,j_image))
        df_label.append(label_num)
    label_num += 1
df_test['images'] = df_image
df_test['target'] = df_label
#caries = 1, no-caries = 0
df_test.to_csv('./test.csv')

In [5]:
df_train.tail(5),df_test.tail(5)

# ENHANCE DATASET

In [6]:
# lib
# !pip install -U albumentations

In [7]:
# os.makedirs('teeth_dataset',exist_ok=True)
# os.makedirs('teeth_dataset/train',exist_ok=True)
# os.makedirs('teeth_dataset/test',exist_ok=True)
# os.makedirs('teeth_dataset/train/caries',exist_ok=True)
# os.makedirs('teeth_dataset/train/no-caries',exist_ok=True)
# os.makedirs('teeth_dataset/test/caries',exist_ok=True)
# os.makedirs('teeth_dataset/test/no-caries',exist_ok=True)

In [8]:
# transform = A.Compose([
#     A.VerticalFlip(p=0.5),
#     A.HorizontalFlip(p=0.5),
#     A.RandomBrightnessContrast(p=0.6),
#      A.OneOf([
#                         A.MotionBlur(blur_limit=3),
#                         A.MedianBlur(blur_limit=3),
#                         A.GaussianBlur(blur_limit=(1,3)),
#                         A.GaussNoise(var_limit=(5.0, 15.0)),
#                     ], p=0.7),
#     A.OneOf([
#                         A.OpticalDistortion(distort_limit=1.0),
#                         A.GridDistortion(num_steps=5, distort_limit=1.),
#                         A.ElasticTransform(alpha=3),
#                     ], p=0.7),
#     A.CLAHE(clip_limit=4.0, p=0.7),
#     A.ColorJitter(p=0.7,hue=0.1),
#     A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
#     A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
#     A.CoarseDropout(max_holes=1, max_height=int(50 * 0.375), max_width=int(50 * 0.375), min_holes=1, min_height=None, min_width=None, fill_value=0, mask_fill_value=None, always_apply=False, p=0.7),
    
# ])

In [9]:
# path = os.path.join('../input/toothdecay/teeth_dataset','test','caries')
# path_save = os.path.join('teeth_dataset','test','caries')
# for _image in os.listdir(path):
#     for i in range(20):
#         image = cv2.imread(os.path.join(path,_image))
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         transformed = transform(image=image)['image']
#         cv2.imwrite(path_save+'/'+_image[:-4]+"_"+str(i)+'.jpg' , transformed)

# VISUALIZE DATA

In [10]:
print('Number of train images:',len(df_train))
print('Number of test images:',len(df_test))

In [11]:
plt.figure(figsize=(10,10))
ax = sns.countplot(x = 'target', data = df_train)
ax.set_title('Number Caries and No-Caries Images in Training', size=20)
ax.set_xlabel("Label",fontsize=20)
ax.set_ylabel("Number",fontsize=20)
ax.set_xticklabels(['No-Caries', 'Caries'],fontsize=15)
for p in ax.patches:
    ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+0.2),fontsize=20)

In [12]:
plt.figure(figsize=(10,10))
ax = sns.countplot(x = 'target', data = df_test)
ax.set_title('Number Caries and No-Caries Images in Test', size=20)
ax.set_xlabel("Label",fontsize=20)
ax.set_ylabel("Number",fontsize=20)
ax.set_xticklabels(['No-Caries', 'Caries'],fontsize=15)
for p in ax.patches:
    ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+0.2),fontsize=20)

In [13]:
bar, ax = plt.subplots(figsize = (10,10))
bar.patch.set_facecolor('white')
plt.pie(df_train['target'].value_counts(), 
        labels = ['Carries','No-Caries'], 
        autopct="%.1f%%",textprops={'fontsize':20})
plt.title('% Carries and No-Caries In Train', size=20)

In [14]:
bar, ax = plt.subplots(figsize = (10,10))
bar.patch.set_facecolor('white')
plt.pie(df_test['target'].value_counts(), 
        labels = ['Carries','No-Caries'], 
        autopct="%.1f%%",textprops={'fontsize':20})
plt.title('% Carries and No-Caries In Test', size=20)

In [15]:
# Print Shape of dataset to choose rescale
# for i in os.listdir(os.path.join(BASE_DIR,'train','caries')):
#     im = cv2.imread(os.path.join(BASE_DIR,'train','caries',i)) #H,W,C
    # print(im.shape)
# for i in os.listdir(os.path.join(BASE_DIR,'train','caries')):
#     im = cv2.imread(os.path.join(BASE_DIR,'train','caries',i)) #H,W,C

# for i in os.listdir(os.path.join(BASE_DIR,'train','caries')):
#     im = cv2.imread(os.path.join(BASE_DIR,'train','caries',i)) #H,W,C

# for i in os.listdir(os.path.join(BASE_DIR,'train','caries')):
#     im = cv2.imread(os.path.join(BASE_DIR,'train','caries',i)) #H,W,C

print('Choose 224x224 rescale is best choice')

In [16]:
f , ax = plt.subplots(2,5)
f.set_size_inches(10, 10)
f.suptitle('Carries Image',x=0.5,y=0.8,fontsize=30,verticalalignment='bottom')
k = 0
for i in range(2):
    for j in range(5):
        img = cv2.imread(os.path.join('../input/toothdecay/teeth_dataset','train','caries',random.choice(os.listdir(os.path.join('../input/toothdecay/teeth_dataset','train','caries')))))
        # im_resized = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)
        ax[i,j].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        k = k + 1
    plt.tight_layout()

In [17]:
f , ax = plt.subplots(2,5)
f.set_size_inches(10, 10)
f.suptitle('No-Carries Image',x=0.5,y=0.8,fontsize=30,verticalalignment='bottom')
k = 0
for i in range(2):
    for j in range(5):
        img = cv2.imread(os.path.join('../input/toothdecay/teeth_dataset','train','no-caries',random.choice(os.listdir(os.path.join('../input/toothdecay/teeth_dataset','train','no-caries')))))
        # im_resized = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)
        ax[i,j].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        k = k + 1
    plt.tight_layout()

In [18]:
# Set up Generator
train_generator = ImageDataGenerator( 
    rescale = 1.0/255.,
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    brightness_range=(0.5, 1.5),
)
val_generator = ImageDataGenerator(rescale=1./255.)

In [19]:
# Set up dataload
train_datagen = train_generator.flow_from_directory(
    train_folder, 
    batch_size=10,
    target_size=(224, 224)
)
val_datagen = val_generator.flow_from_directory(
    test_folder, 
    batch_size=round(len(df_test)/2), # 14/2 = 7
    target_size=(224, 224)
)

In [20]:
# Images After DATAGENERATOR
# Loads image in from the set image path
pic = train_generator.flow_from_directory(
    '../input/toothdecay/teeth_dataset/train', 
    batch_size=1,
    target_size=(112, 112))
f,ax = plt.subplots(1,5)
f.set_figheight(15)
f.set_figwidth(15)
for i in range(5):
    img, label = pic.next()
    ax[i].imshow(img[0])
    ax[i].set_xlabel('Caries' if np.argmax(label) == 0 else 'No-Caries',fontsize=12)

In [21]:
# https://keras.io/api/applications/
base_model = tf.keras.applications.Xception(input_shape=(224, 224, 3),weights=None,include_top=False,classes=2)

In [22]:
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.6))
model.add(Dense(2, activation="softmax"))
model.summary()

In [23]:
tf.keras.utils.plot_model(model, show_shapes = True, show_dtype = True, show_layer_names = True, rankdir="TB", expand_nested = True, dpi = 100) # ,to_file='model.png')

In [24]:
from PIL import ImageFont
font = ImageFont.load_default()
visualkeras.layered_view(model, legend=True, font=font,to_file='output.png')  # font is optional!

In [25]:
# Functone train
# có thể thay thế Adam = RMSprop = SGD
model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001),
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

In [26]:
path_checkpoint = 'tmp/efficienetb0_checkpoint'
os.makedirs(path_checkpoint,exist_ok=True)

# https://keras.io/api/callbacks/model_checkpoint/
# https://www.tensorflow.org/tensorboard/scalars_and_keras
Checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath='best_model.h5',
                                                  monitor='val_acc',
                                                mode='max',
                                                 verbose=1)
callback = [Checkpoint, 
             tf.keras.callbacks.EarlyStopping(patience = 5, monitor= "val_acc",verbose=1),
             tf.keras.callbacks.TensorBoard(log_dir = "logs/{}".format('Toothdecay'))]

In [27]:
history = model.fit(
    train_datagen,
    validation_data=val_datagen,
    epochs=20,
    callbacks=[
        callback
    ])

# Plot Accuracy and Loss

In [28]:
# Load the TensorBoard notebook extension.
# %load_ext tensorboard
# %tensorboard --logdir logs/Toothdecay

In [29]:
plt.plot(history.history["accuracy"] , 'ro-' , label = "Training Accuracy")
plt.plot(history.history["val_accuracy"] , 'go-' , label = "Testing Accuracy")
plt.legend()
plt.show()

In [30]:
plt.plot(history.history["loss"] , 'ro-' , label = "Training Loss")
plt.plot(history.history["val_loss"] , 'go-' , label = "Testing Loss")
plt.legend()
plt.show()

In [31]:
# https://keras.io/api/preprocessing/image/
val_generator = ImageDataGenerator(rescale=1./255.)
val_datagen = val_generator.flow_from_directory(
    test_folder, 
    batch_size=round(len(df_test)/2), # 14/2 = 7
    target_size=(224, 224),
    shuffle=False
)
# for i in range(5):
#     img, label = pic.next()
#     ax[i].imshow(img[0])
#     ax[i].set_xlabel('Caries' if np.argmax(label) == 0 else 'No-Caries',fontsize=12)

In [32]:
Y_true = val_datagen.classes
Y_true

In [33]:
Y_pred  = model.predict(val_datagen).round()
Y_pred[0:5]

In [34]:
Y_true_onehot = tf.keras.utils.to_categorical(Y_true)
Y_true_onehot[0:5]

In [35]:
from sklearn.metrics import confusion_matrix , classification_report
target_names = ['caries','no-caries']
print(classification_report(Y_true_onehot , Y_pred, target_names = target_names))

In [36]:
cm = confusion_matrix(Y_true_onehot.argmax(axis = 1) , Y_pred.argmax(axis = 1))
cm = pd.DataFrame(cm , index = ['caries','no-caries'] , columns = ['caries','no-caries'])
plt.figure(figsize = (10,10))
sns.heatmap(cm,cmap= "Blues", linecolor = 'black' , linewidth = 1 , annot = True, fmt='')

In [37]:
# https://scikit-learn.org/stable/modules/model_evaluation.html
from sklearn.metrics import roc_auc_score,precision_score,recall_score,f1_score,roc_curve, auc
print("roc_auc_score:",roc_auc_score(Y_true_onehot,Y_pred))

In [38]:
precision_score(Y_true_onehot,Y_pred,average=None)

In [39]:
recall_score(Y_true_onehot,Y_pred,average=None)

In [40]:
f1_score(Y_true_onehot,Y_pred,average=None)

In [41]:
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(2):
    fpr[i], tpr[i], _ = roc_curve(Y_true_onehot[:, i], Y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

In [42]:
fpr["micro"], tpr["micro"], _ = roc_curve(Y_true_onehot.ravel(), Y_pred.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

In [43]:
plt.figure()
lw = 3
plt.plot(fpr[1], tpr[1], color='darkorange',
         lw=lw, label='ROC curve (area = %0.3f)' % roc_auc[1])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ToothDecayClasicify characteristic')
plt.legend(loc="lower right")
plt.show()

In [44]:
from scipy import interp
from itertools import cycle
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(2)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(2):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= 2

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure(figsize = (20,20))
plt.plot(fpr["micro"], tpr["micro"],
         label='micro-average ROC curve (area = {0:0.5f})'
               ''.format(roc_auc["micro"]),
         color='deeppink', linestyle=':', linewidth=4)

plt.plot(fpr["macro"], tpr["macro"],
         label='macro-average ROC curve (area = {0:0.5f})'
               ''.format(roc_auc["macro"]),
         color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(2), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='ROC curve of class {0} (area = {1:0.5f})'
             ''.format(i, roc_auc[i]))
plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.show()

In [45]:
model.load_weights('best_model.h5')

In [46]:
classes = ['carries','no-caries']

In [47]:
!pip install google.colab
from google.colab.patches import cv2_imshow

In [48]:
srcdir = '../input/toothdecay/teeth_dataset/test/caries'
count=0

for temp in os.listdir(srcdir):
    img = cv2.imread(os.path.join(srcdir, temp))
    cv2.imwrite(temp, img)
    cv2_imshow(img)
    img = cv2.resize(img, (112, 112))
    result = model.predict(img.reshape(1, 112, 112, 3))
    max_prob = max(result[0])
    class_ind = list(result[0]).index(max_prob)
    class_name = classes[class_ind]
    print(class_name)
    count+=1
    if count>3:
        break