## Phase 2: Find Processing Techniques

In [3]:
# %reload_ext autoreload
# %autoreload 2

In [4]:
# connects to utils and run a test for connectivity
from Utils.test_class_func import Test_py 
from Utils.test_class_func import test_py  
print(Test_py("My_Test_Class").print_(), "\n")
print(test_py("My_Test_Function"))

Class -> Try if python utils connects to notebook: My_Test_Class 

Function -> Try if python utils connects to notebook: My_Test_Function


In [24]:
# import functions 
from Utils.preporcessing_utils import data_loading 
from Utils.preporcessing_utils import labels_encoding
from Utils.preporcessing_utils import split_data
from Utils.preporcessing_utils import image_iterators
from Utils.preporcessing_utils import ablation
from Utils.models_utils import Basic_Custom_CNN
from Utils.evaluation_utils import Evaluation
from Utils.save_data_utils import Save_Data

In [25]:
# import libraries
from tensorflow.keras import backend as K

### Setup Pipeline Workflow

#### Data Preparation and Preprocessing

In [10]:
# Loads data
train_df, test_df = data_loading("train_full.csv", "test_full.csv")

In [11]:
# Create dataframe and transform(encodes) pathology labels
train_df, test_df = labels_encoding(train_df, test_df)

In [23]:
train_df.columns

Index(['image_id', 'image_type', 'image_path', 'series_uid', 'subject_id',
       'study_uid', 'breast_density', 'breast_side', 'image_view',
       'abnormality_type', 'pathology', 'split', 'label'],
      dtype='object')

In [12]:
# Split data
train_data, val_data, test_data = split_data(train_df, test_df, 0.11)

Train set: 1889 cases, 70.35 %
Validation set: 234 cases, 8.72 %
Test set: 562 cases, 20.93 %


#### Iteration 1: Finding best preprocessing technique using custom CNN

In [13]:
# variables 
project_phase = "P2"
options = ['apply_background_removal',
           'apply_crop',
           'apply_noise_reduction',
           'apply_contrast_enhancement',
           'apply_edge_enhancement',
           'apply_lbp_texturizer']

y_true = test_data["label"]

In [14]:
# create group of techniques to try
techniques_groups = ablation(options)

In [19]:
techniques_groups

{'Baseline Basic Preporcessing': {'apply_background_removal': False,
  'apply_crop': False,
  'apply_noise_reduction': False,
  'apply_contrast_enhancement': False,
  'apply_edge_enhancement': False,
  'apply_lbp_texturizer': False},
 'All Preporcessing Techniques': {'apply_background_removal': True,
  'apply_crop': True,
  'apply_noise_reduction': True,
  'apply_contrast_enhancement': True,
  'apply_edge_enhancement': True,
  'apply_lbp_texturizer': True},
 'No Background removal': {'apply_background_removal': False,
  'apply_crop': True,
  'apply_noise_reduction': True,
  'apply_contrast_enhancement': True,
  'apply_edge_enhancement': True,
  'apply_lbp_texturizer': True},
 'No Crop': {'apply_background_removal': True,
  'apply_crop': False,
  'apply_noise_reduction': True,
  'apply_contrast_enhancement': True,
  'apply_edge_enhancement': True,
  'apply_lbp_texturizer': True},
 'No Noise reduction': {'apply_background_removal': True,
  'apply_crop': True,
  'apply_noise_reduction': F

In [22]:
# iterate trough techniques groups for training a model with each group
for technique_name, techniques in techniques_groups.items():
    
    # create model name
    model_name = "Custom CNN - " + technique_name
    print("Training " + model_name)
    
    # reset and clears variables before creating a new model 
    K.clear_session()
    
    # Create image iterators with preprocessing function for each set of preprocessing techniques 
    train_generator, val_generator, test_generator = image_iterators((train_data, val_data, test_data), 
                                                    is_resnet_vgg=False,
                                                    preporcessing_techniques=techniques
                                                  )
    
    # initiate model class
    model_instance = Basic_Custom_CNN(input_shape=(256, 256, 1), num_classes=2, epochs=10)
    
    # create model architecture
    model = model_instance.architecture()
    
    # train model
    history = model.train_model(train_generator, val_gen=val_generator)
    
    # save model and get path
    model_path = model_instance.save_model(models_directory="Models", model_file=technique_name)

    # evaluate model by making predictions
    evaluation = Evaluation(model)
    y_probs = evaluation.predict(test_generator)

    # calculate metrics
    metrics = evaluation.calculate_metrics(y_true, y_probs)

    # get labels dictionary
    y_labels = evaluation.get_labels()

    # save data
    save_data = Save_Data(file_name="models_data.json", out_directory="Outputs")
    save_data.add_model_data(model_name, model_path, history, metrics, y_labels, project_phase, comments="")
    save_data.save_model_data()


NameError: name 'tectechniques_groups' is not defined