# Machine Learning Model for Annulation Count

In [3]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

directory = './Cementum Data with Counts Bad Images Removed/'

# Resize all images
target_size = (300, 300)

images = []
counts = []

for dirpath, dirnames, files in os.walk(directory):
    if not dirnames:
        cementum_counts = int(dirpath.split(" ")[-1])
        for file in files:
            path = os.path.join(dirpath, file)
            image = cv2.imread(path, cv2.IMREAD_COLOR)
            resized_image = cv2.resize(image, target_size)
            blurred_image = cv2.GaussianBlur(resized_image, (5, 5), 0)
            equalized_image = cv2.merge([
                cv2.equalizeHist(blurred_image[:, :, 0]),
                cv2.equalizeHist(blurred_image[:, :, 1]),
                cv2.equalizeHist(blurred_image[:, :, 2])
            ])            
            
            images.append(resized_image)
            counts.append(cementum_counts)

X = np.array(images)
y = np.array(counts)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

Training set shape: (768, 300, 300, 3) (768,)
Testing set shape: (193, 300, 300, 3) (193,)


## Explanation

The above code just goes through the specified folder and opens the images using CV2 library for Python. I went through the folder and added cementum counts based on Akacia's spreadsheet to the folder names. Therefore, the code puts the images along with the cementum counts in two arrays, which are then passed into a train_test_split.

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(300, 300, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.fit(X_train, y_train, epochs=6, validation_split=0.2)

test_loss, test_mae = model.evaluate(X_test, y_test)
print("Test MAE:", test_mae)

Epoch 1/6
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 569ms/step - loss: 32132792.0000 - mae: 2802.3245 - val_loss: 2194.0579 - val_mae: 43.6548
Epoch 2/6
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 566ms/step - loss: 2786.8755 - mae: 44.5068 - val_loss: 470.1296 - val_mae: 17.3353
Epoch 3/6
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 562ms/step - loss: 408.6553 - mae: 16.1222 - val_loss: 230.4533 - val_mae: 12.3777
Epoch 4/6
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 579ms/step - loss: 226.4553 - mae: 12.2911 - val_loss: 196.7703 - val_mae: 11.7355
Epoch 5/6
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 567ms/step - loss: 184.8667 - mae: 10.9853 - val_loss: 210.1182 - val_mae: 11.9526
Epoch 6/6
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 564ms/step - loss: 183.8877 - mae: 10.9530 - val_loss: 227.8824 - val_mae: 12.7923
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

The above code uses the Sequential model for Keras for our annulation estimation model. I chose it because it was straightforward and did not want to have any issues with overfitting, since the dataset has a large variety of quality.

For future development, one could consider trying different model architectures to see how it changes the accuracy of the estimates.

# Notes

- Standardizing images to 300, 300 returns a Test MAE of 11.054 (not great)
- Using edge detection and 300, 300 returns a Test MAE of 11.071 (basically the same thing)

# Using Data Augmentation

In [48]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

input_shape = (300, 300, 3)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.fit(datagen.flow(X_train, y_train, batch_size=32), epochs=50, validation_data=(X_test, y_test))

test_loss, test_mae = model.evaluate(X_test, y_test)
print("Test MAE:", test_mae)


Epoch 1/50


  self._warn_if_super_not_called()


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 1s/step - loss: 1206.2736 - mae: 31.3695 - val_loss: 1249.1981 - val_mae: 32.0655
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 1s/step - loss: 1167.5787 - mae: 30.8453 - val_loss: 1403.8884 - val_mae: 34.4828
Epoch 3/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - loss: 1130.6686 - mae: 30.3678 - val_loss: 1073.3232 - val_mae: 29.4530
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - loss: 1099.0664 - mae: 29.8599 - val_loss: 608.2206 - val_mae: 19.8704
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - loss: 962.1870 - mae: 27.5318 - val_loss: 1240.6549 - val_mae: 31.9073
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - loss: 895.2390 - mae: 26.3688 - val_loss: 761.4824 - val_mae: 24.0777
Epoch 7/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

The above attempt used data augmentation to further change and add variety to the training data. The test was also ran for more epochs but that did not seem to have any great effect on the test MAE. It is also worth noting that this Sequential model has more layers and is more complex, but that did not seem to have a great effect either.

# Removing Slides that were Redone

In [52]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

input_shape = (300, 300, 3)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.fit(datagen.flow(X_train, y_train, batch_size=32), epochs=50, validation_data=(X_test, y_test))

test_loss, test_mae = model.evaluate(X_test, y_test)
print("Test MAE:", test_mae)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50


  self._warn_if_super_not_called()


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - loss: 1286.8789 - mae: 33.0725 - val_loss: 590.4973 - val_mae: 19.7711
Epoch 2/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - loss: 1319.6798 - mae: 33.4466 - val_loss: 677.5323 - val_mae: 22.4370
Epoch 3/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2s/step - loss: 1288.8495 - mae: 33.3363 - val_loss: 1089.0237 - val_mae: 30.1252
Epoch 4/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - loss: 1255.0658 - mae: 32.8908 - val_loss: 736.2646 - val_mae: 23.3693
Epoch 5/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1s/step - loss: 1167.0338 - mae: 31.5517 - val_loss: 1060.1381 - val_mae: 29.5870
Epoch 6/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1s/step - loss: 1086.8112 - mae: 30.3331 - val_loss: 932.6024 - val_mae: 27.5142
Epoch 7/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

This attempt only uses training data which was redone and I removed slides that were redone. It still uses a complex model which can be summarized with the following:

Convolutional Layers (Conv2D): Extract features from the images by a convolutional operation to an input image.

BatchNormalization Layers: Normalizing the layers' inputs by recentering and rescaling in order to increase stability and speed.

MaxPooling2D Layers: Reduce the spatial dimensions by selecting the maximum value in each region which retains the most important information.

Dropout Layers: Prevent overfitting by randomly dropping units during training.

Flatten Layer: Convert 2D feature maps into a 1D vector for the dense layers.

Dense Layers: Make predictions based on the extracted features. In our case, this would be the annulation estimation.

# Using RoboFlow

In [29]:
from roboflow import Roboflow
import os

rf = Roboflow(api_key="LuhaB9DHav8NHDPKM8Si")
project = rf.workspace().project("annulation-region-detection")
model = project.version(1).model

count = 0
directory = "./Flattened Images with Only Redone (RoboFlow)/"

for dirpath, dirnames, files in os.walk(directory):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(dirpath, file)
            new_file = "prediction-" + file
            prediction = model.predict(path, confidence=90).json()
            prediction_array = prediction['predictions']
            if len(prediction_array) != 0:
                if prediction_array[0]['confidence'] > 0.90:
                    model.predict(path, confidence=90).save(os.path.join("./RoboFlow Predictions - 80 percent confident/", new_file))
                    count += 1
                    print("Processed (" + str(count) + "): " + file)

loading Roboflow workspace...
loading Roboflow project...
Processed (1): PNRP14_B.5.34_SKULL9_T46_SLIDE3_PIC1.jpg
Processed (2): PNRP14_B.7.19.56_T195_SLIDE1_PIC2.jpg
Processed (3): PNRP12_B.6.5.18_T189_SLIDE4_PIC4.jpg
Processed (4): PNRP12_B.4.22_T10_2_SLIDE6_PIC1.jpg
Processed (5): PNRP12_B.4.17_T6_SLIDE3_PIC3.jpg
Processed (6): PNRP14_B.7.33_T232_SLIDE4_PIC1.jpg
Processed (7): PNRP12_B.4.10_T2_SLIDE3_PIC2.jpg
Processed (8): PNRP14_B.7.33_T274_SLIDE2_PIC1.jpg
Processed (9): PNRP14_B.6.32_T142_SLIDE3_PIC1.jpg
Processed (10): PNRP12_B.4.22_T10_SLIDE7_PIC5.jpg
Processed (11): PNRP12_B.4.22_T10_SLIDE7_PIC4.jpg
Processed (12): PNRP12_B.5.15_T267_SLIDE2_PIC3.jpg
Processed (13): PNRP12_B.4.22_T10_SLIDE6_PIC4.jpg
Processed (14): PNRP12_B.4.10_T1_SLIDE1_PIC1.jpg
Processed (15): PNRP12_B.4.10_T2_SLIDE3_PIC3.jpg
Processed (16): PNRP14_B.5.35.17_T72_SLIDE2_PIC1.jpg
Processed (17): PNRP12_B.4.17_T6_SLIDE2_PIC2.jpg
Processed (18): PNRP14_B.7.19.56_T195_SLIDE1_PIC3.jpg
Processed (19): PNRP12_B.4.22

HTTPError: 403 Client Error: Forbidden for url: https://outline.roboflow.com/annulation-region-detection/1?api_key=LuhaB9DHav8NHDPKM8Si&confidence=90

# Self Hosted Inference - Annotates images

In [74]:
import os
import supervision as sv
from inference import get_model
from PIL import Image
import json


model = get_model(model_id="annulation-region-detection/1", api_key="LuhaB9DHav8NHDPKM8Si")

directory = "./Flattened Images with Only Redone (RoboFlow)/"

polygon_annotator = sv.PolygonAnnotator()

count = 0

for dirpath, dirnames, files in os.walk(directory):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(dirpath, file)
            image = Image.open(path)
            new_file = "prediction-" + file
            results = model.infer(image=path)[0]
            predictions_json = results.json()
            
            predictions = json.loads(predictions_json)["predictions"]
            detections = sv.Detections.from_inference(results)
            
            confidence_threshold = 0.9
            
            if len(predictions) > 0 and predictions[0]["confidence"] > confidence_threshold:
                # annotate image with polygon
                annotated_image = polygon_annotator.annotate(scene=image.copy(), detections=detections)
                annotated_image.save(os.path.join("./Self-Hosted Inference - 90 percent confident/", new_file))
                count += 1
                print("Processed (" + str(count) + ") - " + file)

Processed (1) - PNRP14_B.5.34_SKULL9_T46_SLIDE3_PIC1.jpg
Processed (2) - PNRP14_B.7.19.56_T195_SLIDE1_PIC2.jpg
Processed (3) - PNRP12_B.6.5.18_T189_SLIDE4_PIC4.jpg
Processed (4) - PNRP12_B.4.22_T10_2_SLIDE6_PIC1.jpg
Processed (5) - PNRP12_B.4.17_T6_SLIDE3_PIC3.jpg
Processed (6) - PNRP14_B.7.33_T232_SLIDE4_PIC1.jpg
Processed (7) - PNRP12_B.4.10_T2_SLIDE3_PIC2.jpg
Processed (8) - PNRP14_B.7.33_T274_SLIDE2_PIC1.jpg
Processed (9) - PNRP14_B.6.32_T142_SLIDE3_PIC1.jpg
Processed (10) - PNRP12_B.4.22_T10_SLIDE7_PIC5.jpg
Processed (11) - PNRP12_B.4.22_T10_SLIDE7_PIC4.jpg
Processed (12) - PNRP12_B.5.15_T267_SLIDE2_PIC3.jpg
Processed (13) - PNRP12_B.4.22_T10_SLIDE6_PIC4.jpg
Processed (14) - PNRP12_B.4.10_T1_SLIDE1_PIC1.jpg
Processed (15) - PNRP12_B.4.10_T2_SLIDE3_PIC3.jpg
Processed (16) - PNRP14_B.5.35.17_T72_SLIDE2_PIC1.jpg
Processed (17) - PNRP14_B.7.19.56_T195_SLIDE1_PIC3.jpg
Processed (18) - PNRP12_B.4.22_T10_SLIDE2_PIC10.jpg
Processed (19) - PNRP14_B.5.35_T93_SLIDE6_PIC1.jpg
Processed (20) -

Processed (156) - PNRP12_B.6.10.35_T135_SLIDE4_PIC2.jpg
Processed (157) - PNRP14_B.7.33_T237_SLIDE2_PIC3.jpg
Processed (158) - PNRP14_B.7.33_T237_SLIDE3_PIC3.jpg
Processed (159) - PNRP12_B.4.17_T282_SLIDE2_PIC1.jpg
Processed (160) - PNRP14_B.5.35_T93_SLIDE5_PIC4.jpg
Processed (161) - PNRP12_B.4.17_T282_SLIDE3_PIC1.jpg
Processed (162) - PNRP14_B.7.19.56_T195_SLIDE3_PIC4.jpg
Processed (163) - PNRP14_B.7.19.56_T195_SLIDE2_PIC4.jpg
Processed (164) - PNRP14_B.7.33_T234_SLIDE1_PIC3.jpg
Processed (165) - PNRP12_B.5.15_T262_SLIDE4_PIC2.jpg
Processed (166) - PNRP14_B.5.35_T73_SLIDE3_PIC4.jpg
Processed (167) - PNRP12_B.5.15_T263_SLIDE2_PIC2.jpg
Processed (168) - PNRP14_B.7.31_T222_SLIDE2_PIC2.jpg
Processed (169) - PNRP14_B.7.31_T222_SLIDE3_PIC2.jpg
Processed (170) - PNRP12_B.4.22_T10_SLIDE4_PIC3.jpg
Processed (171) - PNRP14_B.5.32.115_T33_SLIDE4_PIC4.jpg
Processed (172) - PNRP14_B.6.42_T276_SLIDE4_PIC4.jpg
Processed (173) - PNRP14_B.7.34_T245_SLIDE1_PIC5.jpg
Processed (174) - PNRP14_B.7.33_T232_

# Self Hosted Inference - Adds Images to Folder without Annotating

In [149]:
import os
import supervision as sv
from inference import get_model
from PIL import Image
import json


model = get_model(model_id="annulation-region-detection/1", api_key="LuhaB9DHav8NHDPKM8Si")

directory = "./Flattened Images with Only Redone (RoboFlow)/"

count = 0

for dirpath, dirnames, files in os.walk(directory):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(dirpath, file)
            image = Image.open(path)
            results = model.infer(image=path)[0]
            predictions_json = results.json()
            
            predictions = json.loads(predictions_json)["predictions"]
            detections = sv.Detections.from_inference(results)
            
            confidence_threshold = 0.7
            
            if len(predictions) > 0 and predictions[0]["confidence"] > confidence_threshold:
                image.save(os.path.join("./Identified Annulation Region - 70 percent confident/", file))
                count += 1
                print("Processed (" + str(count) + ") - " + file)



Processed (1) - TOMB2_COMMINGLED_T250_SLIDE2_PIC4.jpg
Processed (2) - PNRP14_B.5.35_T93_SLIDE6_PIC2.jpg
Processed (3) - PNRP14_B.6.36_T144_SLIDE2_PIC1.jpg
Processed (4) - PNRP14_B.5.34_SKULL9_T46_SLIDE3_PIC1.jpg
Processed (5) - PNRP14_B.5.34_SKULL9_T46_SLIDE2_PIC1.jpg
Processed (6) - PNRP14_B.5.35_T97_SLIDE4_PIC4.jpg
Processed (7) - PNRP14_B.7.19.56_T195_SLIDE1_PIC2.jpg
Processed (8) - PNRP14_B.6.44_T167_SLIDE5_PIC2.jpg
Processed (9) - PNRP12_B.6.5.18_T189_SLIDE4_PIC4.jpg
Processed (10) - PNRP12_B.4.17_T6_SLIDE2_PIC3.jpg
Processed (11) - PNRP12_B.4.22_T10_2_SLIDE6_PIC1.jpg
Processed (12) - PNRP12_B.6.5.18_T189_SLIDE5_PIC4.jpg
Processed (13) - PNRP12_B.4.17_T6_SLIDE3_PIC3.jpg
Processed (14) - PNRP14_B.7.34_T245_SLIDE2_PIC2.jpg
Processed (15) - PNRP14_B.7.33_T232_SLIDE4_PIC1.jpg
Processed (16) - PNRP14_B.6.44_T166_SLIDE3_PIC2.jpg
Processed (17) - PNRP12_B.4.10_T2_SLIDE3_PIC2.jpg
Processed (18) - PNRP14_B.6.44_T166_SLIDE2_PIC2.jpg
Processed (19) - PNRP12_B.4.17_T8_SLIDE4_PIC1.jpg
Processe

Processed (157) - PNRP12_B.6.5.18_T189_SLIDE4_PIC1.jpg
Processed (158) - PNRP12_B.6.5.18_T189_SLIDE5_PIC1.jpg
Processed (159) - PNRP14_B.7.33_T274_SLIDE3_PIC4.jpg
Processed (160) - TOMB2_COMMINGLED_T251_SLIDE4_PIC1.jpg
Processed (161) - PNRP14_B.6.31_T140_SLIDE3_PIC1.jpg
Processed (162) - PNRP14_B.7.31_T222_SLIDE1_PIC1.jpg
Processed (163) - PNRP16_F.1.28.97.69_T312_SLIDE2_PIC2.jpg
Processed (164) - PNRP12_B.4.22_T10_SLIDE7_PIC1.jpg
Processed (165) - PNRP12_B.4.22_T10_SLIDE6_PIC1.jpg
Processed (166) - PNRP14_B.7.34_T245_SLIDE3_PIC6.jpg
Processed (167) - PNRP14_B.5.33_T39_SLIDE5_PIC2.jpg
Processed (168) - PNRP14_B.7.19.56_T192_SLIDE1_PIC2.jpg
Processed (169) - PNRP14_B.5.35_T90_SLIDE4_PIC4.jpg
Processed (170) - PNRP14_B.5.35_T83_SLIDE1_PIC1.jpg
Processed (171) - PNRP16_F.1.28.98.S1_T307_SLIDE1_PIC3.jpg
Processed (172) - PNRP14_B.7.33_T234_SLIDE3_PIC1.jpg
Processed (173) - PNRP14_B.7.33_T237_SLIDE1_PIC3.jpg
Processed (174) - PNRP12_B.4.22_T11_SLIDE1_PIC1.jpg
Processed (175) - PNRP14_B.6.4

Processed (311) - PNRP16_F.1.28.90.S1.BO_T305_SLIDE2_PIC5.jpg
Processed (312) - PNRP14_B.7.34_T248_SLIDE1_PIC1.jpg
Processed (313) - PNRP12_B.4.10_T2_SLIDE4_PIC1.jpg
Processed (314) - PNRP14_B.7.19.56_T194_SLIDE1_PIC1.jpg
Processed (315) - PNRP12_B.4.10_T2_SLIDE5_PIC1.jpg
Processed (316) - PNRP14_B.6.44_T165_SLIDE3_PIC8.jpg
Processed (317) - PNRP14_B.7.20_T202_SLIDE1_PIC2.jpg
Processed (318) - PNRP14_B.5.35_T85_SLIDE1_PIC3.jpg
Processed (319) - PNRP14_B.5.34_SKULL9_T47_SLIDE2_PIC3.jpg
Processed (320) - PNRP14_B.7.33_T232_SLIDE2_PIC3.jpg
Processed (321) - PNRP12_B.4.17_T6_SLIDE4_PIC1.jpg
Processed (322) - PNRP16_F.1.28.97.69_T312_SLIDE4_PIC5.jpg
Processed (323) - PNRP14_B.7.33_T274_SLIDE4_PIC3.jpg
Processed (324) - PNRP12_B.4.23_T12_SLIDE5_PIC4.jpg
Processed (325) - PNRP12_B.4.17_T8_SLIDE3_PIC3.jpg
Processed (326) - PNRP12_B.4.17_T8_SLIDE2_PIC3.jpg
Processed (327) - PNRP14_B.7.34_T247_SLIDE1_PIC2.jpg
Processed (328) - PNRP14_B.6.36_T144_SLIDE4_PIC3.jpg
Processed (329) - PNRP14_B.5.35_T9

Processed (464) - PNRP14_B.7.33_T232_SLIDE6_PIC2.jpg
Processed (465) - PNRP14_B.7.34_T245_SLIDE1_PIC1.jpg
Processed (466) - PNRP14_B.5.35_T85_SLIDE4_PIC2.jpg
Processed (467) - PNRP12_B.4.22_T10_SLIDE4_PIC6.jpg
Processed (468) - PNRP12_B.5.15_T267_SLIDE1_PIC1.jpg
Processed (469) - PNRP14_B.5.32.115_T33_SLIDE5_PIC1.jpg
Processed (470) - PNRP12_B.4.22_T10_SLIDE5_PIC6.jpg
Processed (471) - PNRP16_F.1.28.115.S34_T298_SLIDE2_PIC1.jpg
Processed (472) - PNRP14_B.5.35_T105_SLIDE4_PIC2.jpg
Processed (473) - PNRP14_B.6.44_T165_SLIDE2_PIC3.jpg
Processed (474) - PNRP12_B.4.10_T1_SLIDE3_PIC3.jpg
Processed (475) - PNRP14_B.7.33_T228_SLIDE4_PIC1.jpg
Processed (476) - PNRP12_B.4.10_T1_SLIDE2_PIC3.jpg
Processed (477) - PNRP14_B.6.44_T165_SLIDE3_PIC2.jpg
Processed (478) - PNRP12_B.4.10_T1_SLIDE2_PIC2.jpg
Processed (479) - PNRP12_B.4.10_T1_SLIDE3_PIC2.jpg
Processed (480) - PNRP14_B.6.37_T147_SLIDE4_PIC4.jpg
Processed (481) - PNRP14_B.5.35_T105_SLIDE4_PIC3.jpg
Processed (482) - PNRP12_B.4.22_T10_SLIDE5_PIC

Processed (618) - PNRP14_B.7.34_T245_SLIDE3_PIC8.jpg
Processed (619) - PNRP12_B.4.22_T10_SLIDE3_PIC5.jpg
Processed (620) - PNRP14_B.5.35_T105_SLIDE3_PIC1.jpg
Processed (621) - PNRP14_B.5.35_T105_SLIDE2_PIC1.jpg
Processed (622) - PNRP14_B.7.33_T228_SLIDE3_PIC2.jpg
Processed (623) - PNRP14_B.7.33_T228_SLIDE2_PIC2.jpg
Processed (624) - PNRP14_B.7.19.56_T194_SLIDE2_PIC2.jpg
Processed (625) - PNRP14_B.7.34_T248_SLIDE2_PIC2.jpg
Processed (626) - PNRP14_B.7.19.56_T194_SLIDE3_PIC2.jpg
Processed (627) - PNRP12_B.5.15_T26_SLIDE2_PIC1.jpg
Processed (628) - PNRP12_B.5.15_T26_SLIDE3_PIC1.jpg
Processed (629) - PNRP14_B.6.42_T276_SLIDE2_PIC3.jpg
Processed (630) - PNRP14_B.5.35_T85_SLIDE3_PIC1.jpg
Processed (631) - PNRP14_B.5.35_T85_SLIDE2_PIC1.jpg
Processed (632) - PNRP14_B.7.33_T232_SLIDE1_PIC1.jpg
Processed (633) - PNRP14_B.7.20_T202_SLIDE2_PIC1.jpg
Processed (634) - PNRP14_B.5.34_T59_SLIDE3_PIC1.jpg
Processed (635) - PNRP14_B.6.42_T276_SLIDE2_PIC2.jpg
Processed (636) - PNRP14_B.7.34_T248_SLIDE2_PI

# Function to create CSV file with image and number of annulations

In [3]:
import os
import csv

data = [
    ["File Name", "Actual Annulations"]
]

directory = "./Training Folder/"

for dirpath, dirnames, files in os.walk(directory):
    if not dirnames:
        cementum_counts = int(dirpath.split(" ")[-1])
        for file in files:
            csv_row = [file, cementum_counts]
            data.append(csv_row)
            
filename = "image_annulation.csv"

with open(filename, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerows(data)

print(f"CSV file '{filename}' created successfully.")

CSV file 'image_annulation.csv' created successfully.


# Model with only redone images, data augmentation, resized images to (500,500), and early stopping

In [3]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

csv_info = './image_annulation.csv'
df = pd.read_csv(csv_info)

filename_to_count = dict(zip(df['File'], df['Number of Annulations']))

directory = './Flattened Images with Only Redone/'

target_size = (500, 500)

images = []
counts = []

for dirpath, dirnames, files in os.walk(directory):
    for file in files:
        if file in filename_to_count:
                path = os.path.join(dirpath, file)
                image = cv2.imread(path, cv2.IMREAD_COLOR)
                resized_image = cv2.resize(image, target_size)
                blurred_image = cv2.GaussianBlur(resized_image, (5, 5), 0)
                equalized_image = cv2.merge([
                    cv2.equalizeHist(blurred_image[:, :, 0]),
                    cv2.equalizeHist(blurred_image[:, :, 1]),
                    cv2.equalizeHist(blurred_image[:, :, 2])
                ])

                images.append(resized_image)
                counts.append(filename_to_count[file])
                
X = np.array(images)
y = np.array(counts)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

early_stopping = EarlyStopping(monitor='val_mae', patience=5, restore_best_weights=True)

datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]
)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(500, 500, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.fit(X_train, y_train, epochs=25, validation_split=0.2, callbacks=[early_stopping])

test_loss, test_mae = model.evaluate(X_test, y_test)
print("Test MAE:", test_mae)

model.save('./model-only-redone-images.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 3s/step - loss: 255046224.0000 - mae: 7553.1694 - val_loss: 51951.2656 - val_mae: 226.1686
Epoch 2/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 3s/step - loss: 27711.4844 - mae: 144.6861 - val_loss: 1721.8029 - val_mae: 39.0944
Epoch 3/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 3s/step - loss: 1487.0386 - mae: 32.6333 - val_loss: 206.7856 - val_mae: 12.0920
Epoch 4/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - loss: 371.1442 - mae: 15.5812 - val_loss: 258.1730 - val_mae: 13.4116
Epoch 5/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 2s/step - loss: 206.4132 - mae: 12.0924 - val_loss: 202.2568 - val_mae: 11.9912
Epoch 6/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 2s/step - loss: 175.3393 - mae: 11.0531 - val_loss: 227.9288 - val_mae: 12.6901
Epoch 7/25
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━



Test MAE: 10.903106689453125


This model is the one that was used for the applications built. It employs early stopping, which is a way to monitor if the model is getting better or not with each epoch. It waits a specified number of epochs for the model to improve with the validation data. If it does not, it will stop progressing and return to the best settings for the best model.

# Model with Region Identifiable and Cropped - 70% Confident

In [7]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

csv_info = './image_annulation.csv'
df = pd.read_csv(csv_info)

filename_to_count = dict(zip(df['File'], df['Number of Annulations']))

directory = './Identified Annulation Region - 70 percent confident/'

target_size = (600, 600)

images = []
counts = []

for dirpath, dirnames, files in os.walk(directory):
    for file in files:
        if file in filename_to_count:
                path = os.path.join(dirpath, file)
                image = cv2.imread(path, cv2.IMREAD_COLOR)
                resized_image = cv2.resize(image, target_size)
                blurred_image = cv2.GaussianBlur(resized_image, (5, 5), 0)
                equalized_image = cv2.merge([
                    cv2.equalizeHist(blurred_image[:, :, 0]),
                    cv2.equalizeHist(blurred_image[:, :, 1]),
                    cv2.equalizeHist(blurred_image[:, :, 2])
                ])

                images.append(resized_image)
                counts.append(filename_to_count[file])
                
X = np.array(images)
y = np.array(counts)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

early_stopping = EarlyStopping(monitor='val_mae', patience=10, restore_best_weights=True)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(600, 600, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.fit(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[early_stopping])

test_loss, test_mae = model.evaluate(X_test, y_test)
print("Test MAE:", test_mae)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 4s/step - loss: 1672187136.0000 - mae: 19988.0020 - val_loss: 381714.8750 - val_mae: 605.6277
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 4s/step - loss: 279170.2188 - mae: 472.1573 - val_loss: 62312.8203 - val_mae: 247.6470
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 4s/step - loss: 48397.5156 - mae: 195.6571 - val_loss: 11520.8135 - val_mae: 104.0881
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 4s/step - loss: 7808.0767 - mae: 74.8265 - val_loss: 2516.3037 - val_mae: 46.6569
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 4s/step - loss: 1691.2031 - mae: 35.6881 - val_loss: 359.2148 - val_mae: 15.5476
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 4s/step - loss: 394.9589 - mae: 15.8382 - val_loss: 399.5191 - val_mae: 16.0602
Epoch 7/50
[1m16/16[0m [32m━━━━

# Notes

- First run of only using annulation regions that were identifiable at 80% confidence - Test MAE of 12.06 (Not good)
- Second run using Early Stopping, normalizing pixel values, etc - Using Early Stopping yieled a Test MAE of 126.1 (Terrible)
- First run of only using annulation regions that were identifiable at 90% confidence - Test MAE of 23.08 (Not great but adding in reduce_lr)
- Second run using reduce_lr led to terrible results - Test MAE of 490.2
- Third run using equalized image and removing reduce_lr - Test MAE of 310.25
- Fourth run increasing image size to 500,500 and going back to resized image - Test MAE of 184.16

- Run with images with annulation region identified 70%. Early Stopping 10 epoch with returning best weight. Reached 31/50 Epoch with test MAE of 11.07. Image dimensions are 600,600. 

# Cropping the area around an image using prediction

In [151]:
import os
import supervision as sv
from inference import get_model
from PIL import Image
import json

model = get_model(model_id="annulation-region-detection/1", api_key="LuhaB9DHav8NHDPKM8Si")

directory = "./Flattened Images with Only Redone (RoboFlow)/"

bounding_box_annotator = sv.BoxAnnotator()

count = 0

for dirpath, dirnames, files in os.walk(directory):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(dirpath, file)
            image = Image.open(path)
            results = model.infer(image=path)[0]
            predictions_json = results.json()
            
            predictions = json.loads(predictions_json)["predictions"]
            detections = sv.Detections.from_inference(results)
            
            confidence_threshold = 0.7
            
            if len(predictions) > 0 and predictions[0]["confidence"] > confidence_threshold:
                # Get the bounding box coordinates
                x1, y1, x2, y2 = detections.xyxy[0].astype(int)

                # Crop the image
                cropped_image = image.crop((x1, y1, x2, y2))
                
                # Save the cropped image
                cropped_image.save(os.path.join("./Inference Crop 70 Percent/", file))
                count += 1
                print("Processed (" + str(count) + ") - " + file)



Processed (1) - TOMB2_COMMINGLED_T250_SLIDE2_PIC4.jpg
Processed (2) - PNRP14_B.5.35_T93_SLIDE6_PIC2.jpg
Processed (3) - PNRP14_B.6.36_T144_SLIDE2_PIC1.jpg
Processed (4) - PNRP14_B.5.34_SKULL9_T46_SLIDE3_PIC1.jpg
Processed (5) - PNRP14_B.5.34_SKULL9_T46_SLIDE2_PIC1.jpg
Processed (6) - PNRP14_B.5.35_T97_SLIDE4_PIC4.jpg
Processed (7) - PNRP14_B.7.19.56_T195_SLIDE1_PIC2.jpg
Processed (8) - PNRP14_B.6.44_T167_SLIDE5_PIC2.jpg
Processed (9) - PNRP12_B.6.5.18_T189_SLIDE4_PIC4.jpg
Processed (10) - PNRP12_B.4.17_T6_SLIDE2_PIC3.jpg
Processed (11) - PNRP12_B.4.22_T10_2_SLIDE6_PIC1.jpg
Processed (12) - PNRP12_B.6.5.18_T189_SLIDE5_PIC4.jpg
Processed (13) - PNRP12_B.4.17_T6_SLIDE3_PIC3.jpg
Processed (14) - PNRP14_B.7.34_T245_SLIDE2_PIC2.jpg
Processed (15) - PNRP14_B.7.33_T232_SLIDE4_PIC1.jpg
Processed (16) - PNRP14_B.6.44_T166_SLIDE3_PIC2.jpg
Processed (17) - PNRP12_B.4.10_T2_SLIDE3_PIC2.jpg
Processed (18) - PNRP14_B.6.44_T166_SLIDE2_PIC2.jpg
Processed (19) - PNRP12_B.4.17_T8_SLIDE4_PIC1.jpg
Processe

Processed (157) - PNRP12_B.6.5.18_T189_SLIDE4_PIC1.jpg
Processed (158) - PNRP12_B.6.5.18_T189_SLIDE5_PIC1.jpg
Processed (159) - PNRP14_B.7.33_T274_SLIDE3_PIC4.jpg
Processed (160) - TOMB2_COMMINGLED_T251_SLIDE4_PIC1.jpg
Processed (161) - PNRP14_B.6.31_T140_SLIDE3_PIC1.jpg
Processed (162) - PNRP14_B.7.31_T222_SLIDE1_PIC1.jpg
Processed (163) - PNRP16_F.1.28.97.69_T312_SLIDE2_PIC2.jpg
Processed (164) - PNRP12_B.4.22_T10_SLIDE7_PIC1.jpg
Processed (165) - PNRP12_B.4.22_T10_SLIDE6_PIC1.jpg
Processed (166) - PNRP14_B.7.34_T245_SLIDE3_PIC6.jpg
Processed (167) - PNRP14_B.5.33_T39_SLIDE5_PIC2.jpg
Processed (168) - PNRP14_B.7.19.56_T192_SLIDE1_PIC2.jpg
Processed (169) - PNRP14_B.5.35_T90_SLIDE4_PIC4.jpg
Processed (170) - PNRP14_B.5.35_T83_SLIDE1_PIC1.jpg
Processed (171) - PNRP16_F.1.28.98.S1_T307_SLIDE1_PIC3.jpg
Processed (172) - PNRP14_B.7.33_T234_SLIDE3_PIC1.jpg
Processed (173) - PNRP14_B.7.33_T237_SLIDE1_PIC3.jpg
Processed (174) - PNRP12_B.4.22_T11_SLIDE1_PIC1.jpg
Processed (175) - PNRP14_B.6.4

Processed (311) - PNRP16_F.1.28.90.S1.BO_T305_SLIDE2_PIC5.jpg
Processed (312) - PNRP14_B.7.34_T248_SLIDE1_PIC1.jpg
Processed (313) - PNRP12_B.4.10_T2_SLIDE4_PIC1.jpg
Processed (314) - PNRP14_B.7.19.56_T194_SLIDE1_PIC1.jpg
Processed (315) - PNRP12_B.4.10_T2_SLIDE5_PIC1.jpg
Processed (316) - PNRP14_B.6.44_T165_SLIDE3_PIC8.jpg
Processed (317) - PNRP14_B.7.20_T202_SLIDE1_PIC2.jpg
Processed (318) - PNRP14_B.5.35_T85_SLIDE1_PIC3.jpg
Processed (319) - PNRP14_B.5.34_SKULL9_T47_SLIDE2_PIC3.jpg
Processed (320) - PNRP14_B.7.33_T232_SLIDE2_PIC3.jpg
Processed (321) - PNRP12_B.4.17_T6_SLIDE4_PIC1.jpg
Processed (322) - PNRP16_F.1.28.97.69_T312_SLIDE4_PIC5.jpg
Processed (323) - PNRP14_B.7.33_T274_SLIDE4_PIC3.jpg
Processed (324) - PNRP12_B.4.23_T12_SLIDE5_PIC4.jpg
Processed (325) - PNRP12_B.4.17_T8_SLIDE3_PIC3.jpg
Processed (326) - PNRP12_B.4.17_T8_SLIDE2_PIC3.jpg
Processed (327) - PNRP14_B.7.34_T247_SLIDE1_PIC2.jpg
Processed (328) - PNRP14_B.6.36_T144_SLIDE4_PIC3.jpg
Processed (329) - PNRP14_B.5.35_T9

Processed (464) - PNRP14_B.7.33_T232_SLIDE6_PIC2.jpg
Processed (465) - PNRP14_B.7.34_T245_SLIDE1_PIC1.jpg
Processed (466) - PNRP14_B.5.35_T85_SLIDE4_PIC2.jpg
Processed (467) - PNRP12_B.4.22_T10_SLIDE4_PIC6.jpg
Processed (468) - PNRP12_B.5.15_T267_SLIDE1_PIC1.jpg
Processed (469) - PNRP14_B.5.32.115_T33_SLIDE5_PIC1.jpg
Processed (470) - PNRP12_B.4.22_T10_SLIDE5_PIC6.jpg
Processed (471) - PNRP16_F.1.28.115.S34_T298_SLIDE2_PIC1.jpg
Processed (472) - PNRP14_B.5.35_T105_SLIDE4_PIC2.jpg
Processed (473) - PNRP14_B.6.44_T165_SLIDE2_PIC3.jpg
Processed (474) - PNRP12_B.4.10_T1_SLIDE3_PIC3.jpg
Processed (475) - PNRP14_B.7.33_T228_SLIDE4_PIC1.jpg
Processed (476) - PNRP12_B.4.10_T1_SLIDE2_PIC3.jpg
Processed (477) - PNRP14_B.6.44_T165_SLIDE3_PIC2.jpg
Processed (478) - PNRP12_B.4.10_T1_SLIDE2_PIC2.jpg
Processed (479) - PNRP12_B.4.10_T1_SLIDE3_PIC2.jpg
Processed (480) - PNRP14_B.6.37_T147_SLIDE4_PIC4.jpg
Processed (481) - PNRP14_B.5.35_T105_SLIDE4_PIC3.jpg
Processed (482) - PNRP12_B.4.22_T10_SLIDE5_PIC

Processed (618) - PNRP14_B.7.34_T245_SLIDE3_PIC8.jpg
Processed (619) - PNRP12_B.4.22_T10_SLIDE3_PIC5.jpg
Processed (620) - PNRP14_B.5.35_T105_SLIDE3_PIC1.jpg
Processed (621) - PNRP14_B.5.35_T105_SLIDE2_PIC1.jpg
Processed (622) - PNRP14_B.7.33_T228_SLIDE3_PIC2.jpg
Processed (623) - PNRP14_B.7.33_T228_SLIDE2_PIC2.jpg
Processed (624) - PNRP14_B.7.19.56_T194_SLIDE2_PIC2.jpg
Processed (625) - PNRP14_B.7.34_T248_SLIDE2_PIC2.jpg
Processed (626) - PNRP14_B.7.19.56_T194_SLIDE3_PIC2.jpg
Processed (627) - PNRP12_B.5.15_T26_SLIDE2_PIC1.jpg
Processed (628) - PNRP12_B.5.15_T26_SLIDE3_PIC1.jpg
Processed (629) - PNRP14_B.6.42_T276_SLIDE2_PIC3.jpg
Processed (630) - PNRP14_B.5.35_T85_SLIDE3_PIC1.jpg
Processed (631) - PNRP14_B.5.35_T85_SLIDE2_PIC1.jpg
Processed (632) - PNRP14_B.7.33_T232_SLIDE1_PIC1.jpg
Processed (633) - PNRP14_B.7.20_T202_SLIDE2_PIC1.jpg
Processed (634) - PNRP14_B.5.34_T59_SLIDE3_PIC1.jpg
Processed (635) - PNRP14_B.6.42_T276_SLIDE2_PIC2.jpg
Processed (636) - PNRP14_B.7.34_T248_SLIDE2_PI

# Creating bins and finding if accuracy of the model improves as confidence of region increases

In [36]:
import pandas as pd
import numpy as np

predict_data = pd.read_csv('annulation_predictions.csv')
actual_data = pd.read_csv('image_annulation.csv')

combined_data = pd.merge(predict_data, actual_data, on='File Name', how='left')

bin_ranges = [[0.50, 0.60], [0.60, 0.70],[0.70, 0.80], [0.80, 0.90], [0.90, 1]]

results = []

for range in bin_ranges:
    bin_data = combined_data[combined_data['Region Confidence'] >= range[0]]
    bin_data = bin_data[bin_data['Region Confidence'] < range[1]]
    errors = np.abs(bin_data['Predicted Annulation Count'] - bin_data['Actual Annulations'])

    mae = errors.mean()
    rmse = np.sqrt((errors ** 2).mean())

    accuracy_within_1 = (errors <= 1).mean()  # Percentage of predictions within ±1
    accuracy_within_2 = (errors <= 2).mean()
    accuracy_within_5 = (errors <= 5).mean()
    accuracy_within_7 = (errors <= 7).mean()
    accuracy_within_10 = (errors <= 10).mean()
    accuracy_within_15 = (errors <= 15).mean()
    accuracy_within_20 = (errors <= 20).mean()

    accuracy_between_1_and_5 = ((errors > 1) & (errors <= 5)).mean()
    accuracy_between_5_and_7 = ((errors > 5) & (errors <= 7)).mean()
    accuracy_between_7_and_10 = ((errors > 7) & (errors <= 10)).mean()
    
    results.append({
        'Threshold Range': range,
        'MAE': mae,
        'RMSE': rmse,
        'Accuracy ±1': accuracy_within_1,
        'Accuracy 1-5': accuracy_between_1_and_5,
        'Accuracy 5-7': accuracy_between_5_and_7,
        'Accuracy 7-10': accuracy_between_7_and_10,
        'Accuracy within 10': accuracy_within_10,
        'Accuracy within 15': accuracy_within_15,
        'Accuracy within 20': accuracy_within_20
    })

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Threshold Range,MAE,RMSE,Accuracy ±1,Accuracy 1-5,Accuracy 5-7,Accuracy 7-10,Accuracy within 10,Accuracy within 15,Accuracy within 20
0,"[0.5, 0.6]",9.18328,11.339231,0.052632,0.263158,0.131579,0.157895,0.605263,0.842105,0.921053
1,"[0.6, 0.7]",9.767079,12.603365,0.040816,0.306122,0.061224,0.183673,0.591837,0.755102,0.897959
2,"[0.7, 0.8]",9.462636,12.008867,0.065421,0.233645,0.102804,0.233645,0.635514,0.785047,0.934579
3,"[0.8, 0.9]",9.581816,11.837985,0.068063,0.230366,0.109948,0.196335,0.604712,0.798429,0.918848
4,"[0.9, 1]",10.386872,13.240447,0.094276,0.205387,0.13468,0.13468,0.569024,0.723906,0.845118


## Results of Above Statistics

It seems that all of the threshold ranges of annulation region confidence return a similar estimation accuracy. When the confidence is between 0.9 and 1, the percentage of estimations within 1 annulation is around 10%, which is the highest among the rest in that area. From there, it does not seem that the highest region confidence produces the best estimation. At the end, around 60% of all estimations within each respective threshold range are accurate within 10 annulations.

The test MAE for the model was 10.90 which means that on average the model is wrong by 10.9 annulations. This means that some estimations will be more close to the true number than 10.9, some will be less close than 10.9 as well. Just the average error is 10.9. We can see above that 9.4% of image estimations with high confidence annulation regions produce estimations with good precision.

I would recommend that this code be re-run with unseen data of similar quality since the model has seen some of the data during training.