In [1]:
from keras_preprocessing.image import ImageDataGenerator
from ml.util import NeighborhoodImageDataGenerator
from ml.models.layers import MyTransformerBlock, MyAddPositionEmbs, MyClassToken
import os
import datetime
from pathlib import Path
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from keras.callbacks import TensorBoard
from livelossplot import PlotLossesKerasTF
from tensorflow.keras.optimizers import schedules
from tensorflow.keras.optimizers import Adam

from cfg import LOG_DIR

print(os.getcwd())
os.chdir('/home/jelinek/recetox/')

from ml.pipeline import FeitDataPipelineEncoderDecoder
import tensorflow
from tensorflow import keras

tensorflow.compat.v1.disable_eager_execution()

name = 'MySimpleCNN_FeatureVector_ViT-pos-emb-t128-n2'

class MyAutoencoder(FeitDataPipelineEncoderDecoder):

    def _train_model(self, data_train, data_valid):
        raise NotImplementedError("This method is intentionally not implemented in this instance")

    def __init__(self, *args, **kwargs):

        self.params.tile_size=128
        super().__init__(*args, **kwargs)

        self.params.latent_representation_size = 2048
        self.params.tile_size=128
        self.params.name = name
        self.params.epochs = 200
        self.batch_size = 16
        self.model = MyAutoencoder.get_model(self.params.latent_representation_size)
        self.params.neighborhood_tiles = 2


    def get_optimizer(self):
        lr_schedule = schedules.ExponentialDecay(
            initial_learning_rate=1e-2,
            decay_steps=100,
            decay_rate=0.1,
            staircase= True)

        return Adam(
            # learning_rate=0.1,
            learning_rate=lr_schedule,
            beta_1=0.99,
            beta_2=0.9999)

    @staticmethod
    def get_encoder(encoder_vector_length: int):

        inputs = keras.Input(shape=(128, 128, 3))

        padded_inputs = keras.layers.ZeroPadding2D(padding=(64, 64))(inputs)
        x = keras.layers.Conv2D(filters=4, kernel_size=5, strides=(1, 1), padding='same')(padded_inputs)

        x = keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2))(x)
        x = keras.layers.BatchNormalization(axis=3, epsilon=1.001e-5)(x)

        x = keras.layers.Conv2D(filters=8, kernel_size=5, strides=(1, 1), padding='same')(x)
        x = keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2))(x)
        x = keras.layers.BatchNormalization(axis=3, epsilon=1.001e-5)(x)


        x = keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), padding='same')(x)
        x = keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2))(x)
        x = keras.layers.BatchNormalization(axis=3, epsilon=1.001e-5)(x)

        x = keras.layers.Conv2D(filters=32, kernel_size=5, strides=(1, 1), padding='same')(x)
        x = keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2))(x)
        x = keras.layers.BatchNormalization(axis=3, epsilon=1.001e-5)(x)

        x = keras.layers.Conv2D(filters=64, kernel_size=5, strides=(1, 1), padding='same')(x)
        x = keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2))(x)
        x = keras.layers.BatchNormalization(axis=3, epsilon=1.001e-5)(x)


        x = keras.layers.Conv2D(filters=128, kernel_size=5, strides=(1, 1), padding='same')(x)
        x = keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2))(x)
        x = keras.layers.BatchNormalization(axis=3, epsilon=1.001e-5)(x)

        x = keras.layers.Flatten()(x)
        outputs = keras.layers.Dense(units=11, activation='softmax')(x)

        model = keras.Model(inputs, outputs, name='MySimpleCnnFewerLayers')
        return model

    @staticmethod
    def get_decoder(encoder_vector_length: int):
        pass


    @staticmethod
    def get_model(encoder_vector_length: int):
        return MyAutoencoder.get_encoder(encoder_vector_length)

    def get_data_loader_training_autoencoder(self):

        datagen_train = ImageDataGenerator(horizontal_flip=False, vertical_flip=False, samplewise_center=True,
                                           samplewise_std_normalization=True,
                                           preprocessing_function=FeitDataPipelineEncoderDecoder._divide
                                           )

        data_train = datagen_train.flow_from_directory(directory=self.data_train_autoencoder,
                                                       color_mode='rgb',
                                                       class_mode='input', batch_size=128,
                                                       shuffle=True,
                                                       target_size=(self.params.tile_size, self.params.tile_size))

        return data_train

    def get_data_loader_validation_autoencoder(self):

        datagen_valid = ImageDataGenerator(horizontal_flip=False, vertical_flip=False, samplewise_center=True,
                                           samplewise_std_normalization=True,
                                           preprocessing_function=FeitDataPipelineEncoderDecoder._divide
                                           )

        data_train = datagen_valid.flow_from_directory(directory=self.data_valid_autoencoder,
                                                       color_mode='rgb',
                                                       class_mode='input', batch_size=128,
                                                       shuffle=True,
                                                       target_size=(self.params.tile_size, self.params.tile_size))

        return data_train

    def get_data_loader_training_neighborhood(self):
        datagen_train = NeighborhoodImageDataGenerator(self.params.neighborhood_tiles, horizontal_flip=True,
                                                       vertical_flip=True, samplewise_center=True,
                                                       samplewise_std_normalization=True,
                                                       preprocessing_function=FeitDataPipelineEncoderDecoder._divide)
        tiles_per_axis = self.params.neighborhood_tiles * 2 + 1

        return datagen_train.flow_from_directory(directory=self.params.data_train_neighborhood, color_mode='rgb',
                                                 class_mode='categorical', batch_size=self.params.batch_size,
                                                 shuffle=True,
                                                 target_size=(self.params.tile_size * tiles_per_axis,
                                                              self.params.tile_size * tiles_per_axis))

    def get_data_loader_validation_neighborhood(self):
        datagen_train = NeighborhoodImageDataGenerator(self.params.neighborhood_tiles,
                                                       horizontal_flip=True, vertical_flip=True, samplewise_center=True,
                                                       samplewise_std_normalization=True,
                                                       preprocessing_function=FeitDataPipelineEncoderDecoder._divide)
        tiles_per_axis = self.params.neighborhood_tiles * 2 + 1

        return datagen_train.flow_from_directory(directory=self.params.data_valid_neighborhood, color_mode='rgb',
                                                 class_mode='categorical', batch_size=self.params.batch_size,
                                                 shuffle=True,
                                                 target_size=(self.params.tile_size * tiles_per_axis,
                                                              self.params.tile_size * tiles_per_axis))

    def get_combinator_model(self):

        inputs = [keras.Input(shape=(self.params.latent_representation_size, )) for i in range((self.params.neighborhood_tiles * 2 + 1) ** 2)]

        reshaped = [keras.layers.Reshape((1, self.params.latent_representation_size))(_input) for _input in inputs]
        x = keras.layers.Concatenate(axis=1)(reshaped)

        x = MyClassToken(name="class_token")(x)
        x = MyAddPositionEmbs(name="Transformer/posembed_input")(x)

        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)
        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)

        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)
        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)

        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)
        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)

        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)
        x, _ = MyTransformerBlock(num_heads=8, mlp_dim=self.params.latent_representation_size, dropout=0.1)(x)

        x = keras.layers.LayerNormalization(epsilon=1e-6, name="Transformer/encoder_norm")(x)
        x = keras.layers.Lambda(lambda v: v[:, 0], name="ExtractToken")(x)
        x = keras.layers.Dense(512, name="pre_logits", activation="tanh")(x)

        output = keras.layers.Dense(units = 11, activation = 'softmax')(x)

        neighborhood_model = keras.Model(inputs=inputs, outputs=output)
        return neighborhood_model

    def _train_model_ae_neighborhood(self, data_train_autoencoder, data_valid_autoencoder,
                                            data_train_classifier, data_valid_classifier,
                                           data_train_neighborhood, data_valid_neighborhood):

        feature_extractor = MyAutoencoder.get_encoder(encoder_vector_length=0)
        feature_extractor.compile(loss='binary_crossentropy', optimizer='adam',
                                               metrics=[keras.metrics.CategoricalAccuracy()])

        feature_extractor.fit(data_train_classifier, epochs=200, steps_per_epoch=250, validation_data=data_valid_classifier,
                        validation_steps=self.params.batch_size * 5, validation_freq=10)


        # MyAutoencoder.autoencoder_showcase(data_train_autoencoder, autoencoder)

        inputs = feature_extractor.input
        outputs = feature_extractor.layers[-2].output

        encoder_model = keras.Model(inputs, outputs, name=(self.params.name + '_encoder'))

        for layer in encoder_model.layers:
            layer.trainable=False

        neighborhood_networks = [MyAutoencoder._get_basic_layers(keras.models.clone_model(encoder_model), model_idx)
                                 for model_idx in range((self.params.neighborhood_tiles * 2 + 1) ** 2)]



        encoders_outputs = [model[1] for model in neighborhood_networks]
        encoders_inputs = [model[0] for model in neighborhood_networks]

        parallel_encoder_model = keras.Model(inputs=encoders_inputs, outputs=encoders_outputs)

        neighbourhood_model = self.get_combinator_model()

        neighbourhood_model.summary()
        output_probs = neighbourhood_model(encoders_outputs)

        neighborhood_feature_extractor_model = keras.Model(inputs=parallel_encoder_model.inputs, outputs=output_probs)
        neighborhood_feature_extractor_model.compile(loss='binary_crossentropy', optimizer='adam',
                                               metrics=[keras.metrics.CategoricalAccuracy()])


        tensorboard = TensorBoard(log_dir=LOG_DIR + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
        neighborhood_feature_extractor_model.fit(data_train_neighborhood,
                       steps_per_epoch=250,
                       epochs=100,
                       shuffle=True,
                       validation_data=data_valid_neighborhood, validation_steps=100,
                       validation_freq=5,
                       verbose=1,
                       callbacks=[tensorboard, PlotLossesKerasTF()])

        self.model = neighborhood_feature_extractor_model
        self.model_encoder = encoder_model
        self.model_combinator = keras.Model(inputs = self.model.layers[-1].input, outputs = self.model.layers[-1].output)

/home/jelinek/recetox/src/notebooks


In [2]:
pipeline = MyAutoencoder(
                         data_train_autoencoder='data/Feit_colon-annotation-tiles-128/data_train/',
                         data_valid_autoencoder='data/Feit_colon-annotation-tiles-128/data_valid/',
                         data_train_neighborhood='data/Feit_colon-annotation-tiles-128-2-neighbourhood/data_train',
                         data_valid_neighborhood='data/Feit_colon-annotation-tiles-128-2-neighbourhood/data_valid',
                         train_data_dir='data/Feit_colon-annotation-tiles-128/data_train/',
                         valid_data_dir='data/Feit_colon-annotation-tiles-128/data_valid/')

pipeline.execute_pipeline(perform_validation=False, perform_test_segmentation=False)

Instructions for updating:
Colocations handled automatically by placer.
Found 132437 images belonging to 11 classes.
Found 14721 images belonging to 11 classes.
Found 132437 images belonging to 11 classes.
Found 14721 images belonging to 11 classes.
Epoch 1/200

KeyboardInterrupt: 

In [None]:
pipeline.save_pipeline()

In [None]:
pipeline = FeitDataPipelineEncoderDecoder.load_pipeline(pipeline_name=name)

In [None]:
from ml.eval import eval_model

eval_model(pipeline.model,
           pipeline.get_data_loader_validation_neighborhood(),
           pipeline_name=name,
           print_confusion_matrix=True,
           save_misclassified=True)

In [None]:
from ml.eval import evaluate_segmentation_on_feit_annotation

evaluation_path = Path('data/Feit_colon-annotation_valid/ns-adenoca-colon-15071-2019-20x-he-4/')
# evaluation_path = Path('data/Feit_colon-annotation_valid/')

segmentation_dir = Path('segmentations') / pipeline.params.name

evaluate_segmentation_on_feit_annotation(evaluation_path, pipeline.build_segmenter(),
                                         32, pipeline.params.class_names,
                                         save_segmentations=True, segmentations_dir=segmentation_dir,
                                         neighbourhood_size=pipeline.params.neighborhood_tiles, combinator_model=pipeline.model_combinator,
                                         combination_procedure='neural_networks')