## Phase 2: Find Processing Techniques

In [1]:
# %reload_ext autoreload
# %autoreload 2

In [2]:
# connects to utils and run a test for connectivity
from Utils.test_class_func import Test_py 
from Utils.test_class_func import test_py  
print(Test_py("My_Test_Class").print_(), "\n")
print(test_py("My_Test_Function"))

Class -> Try if python utils connects to notebook: My_Test_Class 

Function -> Try if python utils connects to notebook: My_Test_Function


In [None]:
# import functions 
from Utils.preporcessing_utils import data_loading 
from Utils.preporcessing_utils import labels_encoding
from Utils.preporcessing_utils import split_data
from Utils.preporcessing_utils import image_iterators
from Utils.preporcessing_utils import ablation
from Utils.models_utils import Basic_Custom_CNN
from Utils.evaluation_utils import Evaluation
from Utils.save_data_utils import Save_Data

In [None]:
# import libraries
from tensorflow.keras import backend as K

### Pipeline Workflow

#### Data Preparation and Preprocessing

In [None]:
# Loads data
train_df, test_df = data_loading("train_full.csv", "test_full.csv")

In [None]:
# Create dataframe and transform(encodes) pathology labels
train_df, test_df = labels_encoding(train_df, test_df)

In [None]:
train_df.columns

In [None]:
# Split data
train_data, val_data, test_data = split_data(train_df, test_df, 0.11)

#### Iteration 1: Finding best preprocessing technique using custom CNN

In [None]:
# variables 
project_phase = "P2"
options = ['apply_background_removal',
           'apply_crop',
           'apply_noise_reduction',
           'apply_contrast_enhancement',
           'apply_edge_enhancement',
           'apply_lbp_texturizer']

y_true = test_data["label"]

In [None]:
y_true[:5]

In [None]:
# create group of techniques to try
techniques_groups = ablation(options)

In [None]:
techniques_groups

In [None]:
# iterate trough techniques groups for training a model with each group
for technique_name, techniques in techniques_groups.items():
    
    # create model name
    model_name = "Custom CNN - " + technique_name
    print("Training " + model_name)
    
    # reset and clears variables before creating a new model 
    K.clear_session()
    
    # Create image iterators with preprocessing function for each set of preprocessing techniques 
    train_generator, val_generator, test_generator = image_iterators((train_data, val_data, test_data), 
                                                    is_resnet_vgg=False,
                                                    preprocessing_techniques=techniques
                                                  )
    
    # initiate model class
    model_instance = Basic_Custom_CNN(input_shape=(256, 256, 1), num_classes=2, epochs=10)
    
    # create model architecture
    model_instance.architecture()
    
    # train model
    history = model_instance.train_model(train_generator, val_gen=val_generator)
    
    # save model and get path
    name = technique_name.lower().replace(" ", "_") + ".keras"
    model_path = model_instance.save_model(models_directory="Models", model_file=name)

    # evaluate model by making predictions
    evaluation = Evaluation(model_instance.get_model())
    y_probs = evaluation.predict(test_generator)

    # calculate metrics
    metrics = evaluation.calculate_metrics(y_true, y_probs)

    # get labels dictionary
    y_labels = evaluation.get_labels()

    # save data
    save_data = Save_Data(file_name="models_data.json", out_directory="Outputs")
    save_data.add_model_data(model_name, model_path, history, metrics, y_labels, project_phase, comments="")
    save_data.save_model_data()


In [None]:
metrics

In [None]:
import numpy as np
unique, counts = np.unique(test_data["label"], return_counts=True)
print(dict(zip(unique, counts)))

In [None]:
y_probs[:10]

In [None]:
print(np.unique(train_data["label"], return_counts=True))
print(np.unique(test_data["label"], return_counts=True))

In [12]:
metrics

{'confusion_matrix': array([[  0, 302],
        [  0, 260]]),
 'accuracy': 0.4626334519572954,
 'precision': 0.4626334519572954,
 'recall': 1.0,
 'f1_score': 0.6326034063260341,
 'roc_auc': 0.5,
 'specificity': np.float64(0.0),
 'fpr': np.float64(1.0),
 'fnr': np.float64(0.0)}

In [15]:
import numpy as np
unique, counts = np.unique(test_data["label"], return_counts=True)
print(dict(zip(unique, counts)))

{np.int32(0): np.int64(302), np.int32(1): np.int64(260)}


In [16]:
y_probs[:10]

array([[0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067],
       [0.4814933, 0.5185067]], dtype=float32)

In [17]:
print(np.unique(train_data["label"], return_counts=True))
print(np.unique(test_data["label"], return_counts=True))

(array([0, 1], dtype=int32), array([912, 977]))
(array([0, 1], dtype=int32), array([302, 260]))
