In [1]:
'''
autoreload works on IPython versions 6.0 and higher. It reloads modules after
you save them.

https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
'''
%reload_ext autoreload
'''
Mark modules for autoreload explicitly.
Note, we have to use 1 instead of explicit. For some reason the explicit parameter
never autoreloads aimport modules.
'''
%autoreload 1

%aimport final_project_group_13_utlis
from final_project_group_13_utlis import *

# 1. Configuration


In [2]:
dataset_path = Path(
    r'C:\Users\Michael Metz\Documents\Education\MEng Software\2023-01 - 2023-04 Winter\ENEL 645\BreakHis\BreaKHis_v1\BreaKHis_v1\histology_slides\breast'
)
'''
The path where the two class folders (benign, malignant) of images reside.

raw strings can be used in-case you use a Windows path with `\`.

If you want any other paths in this script to be cross platform, you *must* use
the forward slash `/` to make the paths work on Linux or Mac. But if you are
just using a path on only a Windows machine (like this dataset_path) you can
use `\`.

Also note you can't end a raw string with a `\` (and don't need to in this case
as we just need the path up to the folder)
'''

class_list = ['benign', 'malignant']
'''
List of expected class subfolders in the dataset folder.
'''

train_split = 0.6
'''
Set the train split. Train, validation, test split must add up to approximately 1.0.
'''

validation_split = 0.1
'''
Set the validation split. Train, validation, test split must add up to approximately 1.0.
'''

test_split = 1.0 - validation_split - train_split
'''
Set the test split. Train, validation, test split must add up to approximately 1.0.
'''

random_seed = 154
'''
Used for configuring a consistent random_seed where we need randomness with
reproducable results, like when shuffling the order of images.
'''

batch_size = 128
'''
The batch_size for training.
'''

image_size = (224, 340)
'''
The image size of all images in the dataset.
'''

crop_size = (224, 224)
'''
The size to randomly crop all images to during preprocessing (including train, validation, test).
'''

"""
Enable memory growth for the first GPU detected instead of assigning all
of the device memory during runtime initialization
"""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)


# 2. Stratify Split the data into development and test datasets


In [3]:
training_dataset_path, validation_dataset_path, test_dataset_path \
    = stratified_train_val_test_split_into_folders(
        dataset_path,
        move=False,
        train_split=train_split,
        validation_split=validation_split,
        test_split=test_split,
        random_seed=random_seed
    )

display([training_dataset_path, validation_dataset_path, test_dataset_path])


Not copying files as C:\Users\Michael Metz\Documents\Education\MEng Software\2023-01 - 2023-04 Winter\ENEL 645\BreakHis\BreaKHis_v1\BreaKHis_v1\histology_slides\breast\split_data already esists
Development benign set length: 1736
Test benign set length: 744
Development benign image count: 1736
Test benign image count: 744
Training benign set length: 1488
Validation benign set length: 248
Training benign image count: 1488
Validation benign image count: 248

Development malignant set length: 3800
Test malignant set length: 1629
Development malignant image count: 3800
Test malignant image count: 1629
Training malignant set length: 3257
Validation malignant set length: 543
Training malignant image count: 3257
Validation malignant image count: 543



['C:\\Users\\Michael Metz\\Documents\\Education\\MEng Software\\2023-01 - 2023-04 Winter\\ENEL 645\\BreakHis\\BreaKHis_v1\\BreaKHis_v1\\histology_slides\\breast\\split_data\\training',
 'C:\\Users\\Michael Metz\\Documents\\Education\\MEng Software\\2023-01 - 2023-04 Winter\\ENEL 645\\BreakHis\\BreaKHis_v1\\BreaKHis_v1\\histology_slides\\breast\\split_data\\validation',
 'C:\\Users\\Michael Metz\\Documents\\Education\\MEng Software\\2023-01 - 2023-04 Winter\\ENEL 645\\BreakHis\\BreaKHis_v1\\BreaKHis_v1\\histology_slides\\breast\\split_data\\test']

In [4]:
train_ds, val_ds = preprocess_train_val(
    training_dataset_path,
    validation_dataset_path,
    image_size=image_size,
    crop_size=crop_size,
    batch_size=batch_size
)

test_ds = preprocess_test(
    test_dataset_path,
    image_size=image_size,
    crop_size=crop_size,
    batch_size=batch_size
)

display([train_ds, val_ds, test_ds])


Found 4745 files belonging to 2 classes.
Found 791 files belonging to 2 classes.
Found 2373 files belonging to 2 classes.


[<PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))>,
 <PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))>,
 <PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))>]

In [18]:
model = vgginnet_builder()
best_model_file = 'vgginnet_best_model.h5'

train_validate(
    model, train_ds, val_ds, best_model_file=best_model_file)


Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Image_Input (InputLayer)       [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 VGG_Preprocess (Lambda)        (None, 224, 224, 3)  0           ['Image_Input[0][0]']            
                                                                                                  
 vgg16_features (Functional)    (None, 14, 14, 512)  7635264     ['VGG_Preprocess[0][0]']         
                                                                                                  
 conv2d_15 (Conv2D)             (None, 14, 14, 64)   32832       ['vgg16_features[0][0]']  

In [17]:
metrics = test(best_model_file, test_ds)
metrics




{'loss': 1.5145031213760376,
 'accuracy': 0.9254108667373657,
 'f1_score_benign_malignant': array([0.8852884 , 0.94473934], dtype=float32),
 'mae': 0.07449477910995483}