In [1]:
import os
import tensorflow as tf
from tensorflow.keras.layers import Resizing, Rescaling, Dense, Conv2D, MaxPooling2D, Flatten, RandomFlip, RandomContrast, RandomRotation, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D, Input
from tensorflow.keras import Sequential
from tensorflow.image import random_flip_up_down, random_flip_left_right, random_contrast, random_brightness, rot90, resize
from tensorflow.keras.applications import ResNet50, DenseNet121
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import random
import numpy as np
from tensorflow.keras.utils import image_dataset_from_directory
from typing import Literal
import numpy as np
from PIL import Image

In [17]:
[file for file in os.listdir('data/model/training/notumor') if '.jpg' not in file]


[]

In [11]:
image_dataset_from_directory(directory = 'data/model/training/tumor', color_mode = 'rgb', labels = [1.0]*myNum)

ValueError: Expected the lengths of `labels` to match the number of files in the target directory. len(labels) is 4065 while we found 4064 files in directory data/model/training/tumor.

In [4]:
# change directory to parent directory and confirm
os.chdir('..')
os.getcwd()

'/Users/tjurjevich/Desktop/personalProjects/tumor_detection'

In [11]:
# Set seed for reproduceability
SEED = 42

# Size to convert images to (pixels)
RESIZE_HEIGHT = 256
RESIZE_WIDTH = 256

In [12]:
notumor_train_image_count = len(os.listdir('data/model/training/notumor')) # notumor (minority class) with 1595 images
tumor_train_image_count = len(os.listdir('data/model/training/tumor')) # tumor (majority) class with  4117 images

notumor_test_image_count = len(os.listdir('data/model/validation/notumor'))
tumor_test_image_count = len(os.listdir('data/model/validation/tumor'))

In [13]:
# Load photo data
# total = tf.constant(1.0, shape = (tumor_train_images,)).numpy()
train_tumor = image_dataset_from_directory(directory = 'data/model/training/tumor', color_mode = "rgb", labels = [1.0]*tumor_train_image_count)
train_notumor = image_dataset_from_directory(directory = 'data/model/training/notumor', color_mode = "rgb", labels = [0.0]*notumor_train_image_count)

test_tumor = image_dataset_from_directory(directory = 'data/model/validation/tumor', color_mode="rgb", labels = [1.0]*tumor_test_image_count)
test_notumor = image_dataset_from_directory(directory = 'data/model/validation/notumor', color_mode="rgb", labels = [0.0]*notumor_test_image_count)

Found 4117 files belonging to 1 classes.
Found 1595 files belonging to 1 classes.
Found 906 files belonging to 1 classes.
Found 405 files belonging to 1 classes.


In [14]:
def process_images(image, label, transformation = Literal['preprocess_only','preprocess_and_augment']):
    print(type(image), type(label))
    def random_val():
        return float(tf.random.uniform(shape=[], minval = 0, maxval = 1, dtype = tf.float32))
    
    # always resize
    image = resize(image, size = (RESIZE_HEIGHT, RESIZE_WIDTH), method = 'bilinear')

    if transformation == 'preprocess_and_augment':
        if random_val() > 0.5:
            image = random_flip_left_right(image)
        if random_val() > 0.5:
            image = random_flip_up_down(image)
        # if random_val() > 0.5:
        #     image = random_contrast(image, 0.2, 0.5)
        # if random_val() > 0.5:
        #     image = random_brightness(image, 0.2)
    
    return image, label

In [15]:
notumor_original = train_notumor.map(
    lambda img, lab: process_images(img, lab, 'preprocess_only')
)
notumor_augmented = train_notumor.map(
    lambda img, lab: process_images(img, lab, 'preprocess_and_augment')
).repeat(4)


tumor_original = train_tumor.map(
    lambda img, lab: process_images(img, lab, 'preprocess_only')
)
tumor_augmented = train_tumor.map(
    lambda img, lab: process_images(img, lab, 'preprocess_and_augment')
).repeat(1)

<class 'tensorflow.python.framework.ops.SymbolicTensor'> <class 'tensorflow.python.framework.ops.SymbolicTensor'>
<class 'tensorflow.python.framework.ops.SymbolicTensor'> <class 'tensorflow.python.framework.ops.SymbolicTensor'>
<class 'tensorflow.python.framework.ops.SymbolicTensor'> <class 'tensorflow.python.framework.ops.SymbolicTensor'>
<class 'tensorflow.python.framework.ops.SymbolicTensor'> <class 'tensorflow.python.framework.ops.SymbolicTensor'>


In [8]:
final_tumor_train = tumor_original.concatenate(tumor_augmented)
final_notumor_train = notumor_original.concatenate(notumor_augmented)

final_train = final_tumor_train.concatenate(final_notumor_train)


test_tumor = test_tumor.map(
    lambda img, lab: process_images(img, lab, 'preprocess_only')
)
test_notumor = test_notumor.map(
    lambda img, lab: process_images(img, lab, 'preprocess_only')
)
final_test = test_tumor.concatenate(test_notumor)

In [9]:
# Runtime optimization
AUTOTUNE = tf.data.AUTOTUNE 
final_train = final_train.shuffle(buffer_size=1000).prefetch(buffer_size = AUTOTUNE)
final_train = final_train.prefetch(buffer_size = AUTOTUNE)

In [10]:
# Custom classifier method
'''
Inputs -> Preprocessing -> Augmentation (training) -> Convolution ->
Max Pooling -> Convolution -> Max Pooling -> Flatten -> Dense ->
Dense -> Dropout -> Dense (classifier)
'''
class v1TumorClassifier(tf.keras.Model):
    def __init__(self):
        super().__init__()

        self.rescale = Rescaling(scale = 1./255)
        self.conv1 = Conv2D(16, (3,3), activation = 'relu')
        self.pool1 = MaxPooling2D()
        self.conv2 = Conv2D(32, (3,3), activation = 'relu')
        self.pool2 = MaxPooling2D()

        self.dropout = Dropout(0.3)
        self.flatten = Flatten()

        self.dense1 = Dense(128, activation = 'relu')
        self.dense2 = Dense(64, activation = 'relu')

        self.classifier = Dense(1, activation = 'sigmoid')

    def call(self, inputs, training = False):
        # x = self.input(x)
        x = self.rescale(inputs)
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dropout(x, training = training)
        output = self.classifier(x)
        return output


In [11]:
custom_model = v1TumorClassifier()

custom_model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy','recall']  
)

custom_model_history = custom_model.fit(final_train, validation_data = final_test, epochs = 5, batch_size = 32)

Epoch 1/5
[1m508/508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 104ms/step - accuracy: 0.7338 - loss: 0.9522 - recall: 0.7445 - val_accuracy: 0.9588 - val_loss: 0.1157 - val_recall: 0.9636
Epoch 2/5
[1m508/508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 105ms/step - accuracy: 0.9691 - loss: 0.0927 - recall: 0.9663 - val_accuracy: 0.9855 - val_loss: 0.0545 - val_recall: 0.9812
Epoch 3/5
[1m508/508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 106ms/step - accuracy: 0.9877 - loss: 0.0426 - recall: 0.9875 - val_accuracy: 0.9939 - val_loss: 0.0368 - val_recall: 0.9912
Epoch 4/5
[1m508/508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 105ms/step - accuracy: 0.9908 - loss: 0.0306 - recall: 0.9918 - val_accuracy: 0.9847 - val_loss: 0.0714 - val_recall: 0.9779
Epoch 5/5
[1m508/508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 106ms/step - accuracy: 0.9941 - loss: 0.0197 - recall: 0.9946 - val_accuracy: 0.9954 - val_loss: 0.0370 - val_recal

In [13]:
# Transfer learning classifier method
'''
Inputs -> Preprocessing -> Augmentation (training) -> Pretrained Base ->
Global Max/Avg Pooling -> Dense -> Dropout -> Dense (classifier)
'''
class v2TumorClassifier(tf.keras.Model):
    def __init__(self, base_model = Literal['resnet','densenet'], pool_type = Literal['max','avg']):
        super().__init__()

        # Base model assignment
        if base_model == 'resnet':
            self.base_model = ResNet50(
                weights = 'imagenet',
                include_top = False,
                input_shape = (RESIZE_HEIGHT, RESIZE_WIDTH, 3)
            )

        if base_model == 'densenet':
            self.base_model = DenseNet121(
                weights = 'imagenet',
                include_top = False,
                input_shape = (RESIZE_HEIGHT, RESIZE_WIDTH, 3)
            )

        # Base model should not be getting retrained
        self.base_model.trainable = False

        # Standardization layer that resizes to and scales  
        self.rescale = Rescaling(scale = 1./255)

        # Global pooling layer
        if pool_type == 'max':
            self.pool_layer = GlobalMaxPooling2D()
        if pool_type == 'avg':
            self.pool_layer = GlobalAveragePooling2D()

        # Dropout, dense, classifier
        self.dropout = Dropout(0.3)
        self.dense = Dense(32, activation = 'relu')
        self.classifier = Dense(1, activation = 'sigmoid')
    
    def call(self, inputs, training = False):
        x = self.rescale(inputs)
        x = self.base_model(x, training = training)
        x = self.pool_layer(x)
        x = self.dense(x)
        x = self.dropout(x, training = training)
        output = self.classifier(x)
        return output



In [14]:
resnet_1 = v2TumorClassifier(base_model = 'resnet', pool_type = 'avg')

resnet_1.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy','recall']
)

resnet_1_history = resnet_1.fit(final_train, validation_data = final_test, epochs = 5, batch_size = 32)

Epoch 1/5
[1m508/508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 481ms/step - accuracy: 0.6720 - loss: 0.6373 - recall: 0.7538 - val_accuracy: 0.7780 - val_loss: 0.5760 - val_recall: 0.7837
Epoch 2/5
[1m 97/508[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m3:02[0m 444ms/step - accuracy: 0.8012 - loss: 0.4481 - recall: 0.8474

KeyboardInterrupt: 

In [13]:
resnet_2 = v2TumorClassifier(base_model = 'resnet', pool_type = 'max')

resnet_2.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy','recall']
)

resnet_2_history = resnet_2.fit(v2_train_images, validation_data = v2_test_images, epochs = 5, batch_size = 256)

Epoch 1/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 532ms/step - accuracy: 0.8472 - loss: 0.4166 - recall: 0.9444 - val_accuracy: 0.7506 - val_loss: 0.5429 - val_recall: 0.9890
Epoch 2/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 533ms/step - accuracy: 0.8903 - loss: 0.3181 - recall: 0.9700 - val_accuracy: 0.7559 - val_loss: 0.4848 - val_recall: 0.9845
Epoch 3/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 532ms/step - accuracy: 0.8990 - loss: 0.2895 - recall: 0.9768 - val_accuracy: 0.7429 - val_loss: 0.6418 - val_recall: 0.9923
Epoch 4/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 540ms/step - accuracy: 0.8998 - loss: 0.2811 - recall: 0.9719 - val_accuracy: 0.8009 - val_loss: 0.4662 - val_recall: 0.9702
Epoch 5/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 536ms/step - accuracy: 0.9086 - loss: 0.2606 - recall: 0.9720 - val_accuracy: 0.8009 - val_loss: 0.4463 - val_recal

In [14]:
densenet_1 = v2TumorClassifier(base_model = 'densenet', pool_type = 'avg')

densenet_1.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy','recall']
)

densenet_1_history = densenet_1.fit(v2_train_images, validation_data = v2_test_images, epochs = 5, batch_size = 256)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Epoch 1/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 621ms/step - accuracy: 0.8363 - loss: 0.4020 - recall: 0.9195 - val_accuracy: 0.9512 - val_loss: 0.1680 - val_recall: 0.9757
Epoch 2/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 614ms/step - accuracy: 0.9583 - loss: 0.1260 - recall: 0.9827 - val_accuracy: 0.9504 - val_loss: 0.1603 - val_recall: 0.9757
Epoch 3/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 610ms/step - accuracy: 0.9703 - loss: 0.1024 - recall: 0.9848 - val_accuracy: 0.9535 - val_loss: 0.1469 - val_recall: 0.9735
Epoch 4/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 609ms/step - accuracy: 0.9690 - loss: 0.0991 - recall: 0.9845 - val_accuracy: 0.

In [22]:
def process_test_image(image):
    image = resize(image, size = (256, 256), method = 'bilinear')
    return image

In [23]:
raw_img = tf.keras.preprocessing.image.load_img('data/testing/tumor/y0.jpg')
test_img = process_test_image(raw_img)

In [None]:
from src.utils.models import CustomTumorClassifier
os.getcwd()
mod = tf.keras.models.load_model(
    "saved_models/custom_model.keras"
)

'/Users/tjurjevich/Desktop/personalProjects/tumor_detection'

In [41]:
import sys
from pathlib import Path

# Current notebook directory
NOTEBOOK_DIR = Path.cwd()  # e.g., project_root/notebooks
PROJECT_ROOT = NOTEBOOK_DIR.parent
SRC_DIR = PROJECT_ROOT / "src"  # must point to project_root/src

print(SRC_DIR.exists())  # should print True
sys.path.insert(0, str(SRC_DIR))



False


In [45]:
from pathlib import Path
print(Path.cwd())



/Users/tjurjevich/Desktop/personalProjects
