In [1]:
from IPython.display import display, HTML

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from pathlib import Path

from time import sleep

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, multilabel_confusion_matrix
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam, SGD, AdamW
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.initializers import glorot_uniform

2024-06-23 13:32:41.514807: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-23 13:32:41.606598: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-23 13:32:41.979041: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
all_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']
selected_bands = all_bands
bands = [all_bands.index(b) for b in selected_bands]

In [3]:
model_dir = Path('models')
model_dir.mkdir(parents=True, exist_ok=True)

shards_dir = Path.home().joinpath('sentinel_data').joinpath('shards')

feature_type = 'zscore_features' # zscore_features minmax_features

label_type = 'selected_labels'
sort_key = lambda x: int(x.stem.split('_')[-1])
label_filepaths = sorted(list(shards_dir.joinpath(label_type).glob('label_*.npy')), key=sort_key)

IDs = shuffle([int(f.stem.split('_')[-1]) for f in label_filepaths], random_state=42)

In [4]:
class DataGenerator(tf.keras.utils.PyDataset):
    'Generates data for Keras'
    def __init__(self, list_IDs, shuffle, **kwargs):
        super().__init__()
        vars(self).update(kwargs)
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        
        self.use_multiprocessing = True
        self.workers = 4
        self.max_queue_size = 2
        self.on_epoch_end()

    def __len__(self):
        'Number of batches per epoch.'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Get one batch of data.'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.data_generation(list_IDs_temp)
        return X, y

    def on_epoch_end(self):
        'Update and shuffle indexes after each epoch.'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def data_generation(self, list_IDs_temp):
        'Generate batch.'
        X = np.empty((self.batch_size, *self.dim, len(self.bands)))
        y = np.empty((self.batch_size, self.n_classes))

        for i, ID in enumerate(list_IDs_temp):
            X[i,...] = np.load(self.shards_dir.joinpath(
                'features').joinpath(f'feature_{ID}.npy'))[..., bands]
            y[i] = np.load(self.shards_dir.joinpath(
                self.label_type).joinpath(f'label_{ID}.npy'))
            
        return X, y

In [5]:
class KerasModelCreator:
    def __init__(self, **kwargs):
        vars(self).update(kwargs)
        self.kwargs = kwargs
            
    def display_logger(self, log_file, metrics):
        metric_names = [m if isinstance(m, str) else m.name for m in metrics]
        if log_file.is_file():
            val_metrics = ['val_loss'] + ['val_' + x for x in metric_names]
            df = pd.read_csv(log_file)[['epoch', 'loss'] + metric_names + val_metrics]
            df['epoch'] += 1
            print('Previous training:')
            display(HTML(df.to_html(index=False)))
    
    def define_callbacks_and_logger(self, model_path, model_savepoint, log_file, metrics):
        metric_names = [m if isinstance(m, str) else m.name for m in metrics]

        callbacks = [
            tf.keras.callbacks.BackupAndRestore(
                model_savepoint, save_freq='epoch', delete_checkpoint=False
            ),
            tf.keras.callbacks.CSVLogger(log_file, append=True),
            tf.keras.callbacks.ModelCheckpoint(
                model_path, monitor='val_recall', save_best_only=True, 
                save_freq='epoch', initial_value_threshold=0.4,
                verbose=0,
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_recall', factor=0.9, patience=2, min_lr=3e-6,
                verbose=1,
            ),
            tf.keras.callbacks.EarlyStopping(
                monitor='val_recall', 
                verbose=1,
                patience=10,
                mode='max',
                restore_best_weights=True
            ),
        ]
        return callbacks
    
    def get_bias_and_weights(self):
        all_labels = []
        for ID in self.IDs:
            f = self.shards_dir.joinpath(self.label_type).joinpath(f'label_{ID}.npy')
            all_labels.append(np.load(f))
        all_labels = np.vstack(all_labels)
        neg, pos = np.bincount(all_labels.astype(int).flatten())
        initial_bias = np.log([pos/neg])
        class_weights = 0
        return initial_bias, class_weights

    def get_metrics(self):
        prc = tf.keras.metrics.AUC(name='prc', curve='PR')

        f1_scores = []
        for average in ['micro', 'macro', 'weighted']:
            f1_scores.append(
                tf.keras.metrics.F1Score(
                    average=average, threshold=0.5, name=f'{average}f1score')
            )
        metrics = [
            'accuracy', 'recall', 'precision', 'auc', prc
        ] + f1_scores

        return metrics
        
    def run(self):
        ''' 
        If not overwrite and there's an existing model, the model will 
        continue training if the given epoch is bigger than the previous,
        else just evaluate.
        Ensure train splits are the same across continuations / evaluations
        by not modifying the random_state in split_and_normalise.
        '''
        model_savepoint = model_path.parent.joinpath(self.model_path.stem)
        log_file = model_path.with_suffix('.log')
        
        metrics = self.get_metrics()

        if self.overwrite:
            for f in [model_path, log_file] + list(model_savepoint.glob('*')):
                f.unlink(missing_ok=True)
                
        self.display_logger(log_file, metrics)
        
        callbacks = self.define_callbacks_and_logger(
            model_path, model_savepoint, log_file, metrics)
        
        training_ids, test_ids = train_test_split(self.IDs, test_size=0.1, random_state=42)
        validation_ids, test_ids = train_test_split(test_ids, test_size=0.9, random_state=42)

        training_generator = DataGenerator(training_ids, shuffle=True, **self.kwargs)
        testing_generator = DataGenerator(test_ids, shuffle=False, **self.kwargs)
        validation_generator = DataGenerator(validation_ids, shuffle=False, **self.kwargs)
        
        if model_path.is_file():
            print('Loading model...')
            model = load_model(model_path)
        else:
            print('Calculating initial bias...', end=' ')
            initial_bias, class_weights = self.get_bias_and_weights()
            print(initial_bias)
            print('Building model...')
            model = self.build_model(
                self.n_classes, (*self.dim, len(self.bands)), metrics,
                self.architecture, self.loss,
                output_bias=initial_bias,
            )
        print('Fitting...')
        model.fit(
            x=training_generator,
            validation_data=validation_generator,
            epochs=self.epochs,
            callbacks=callbacks
        )
        return model, testing_generator

    def build_vgg(self, input_layer):
        x = input_layer
        for filters in [self.base_filters, self.base_filters*2]:
            for _ in range(2):
                x = Conv2D(
                    filters=filters, kernel_size=3, padding='same', activation='relu',
                )(x)
            x = MaxPooling2D(pool_size=2,strides=2)(x)
            x = BatchNormalization()(x)

        for filters in [self.base_filters*4, self.base_filters*8]:
            for _ in range(3):
                x = Conv2D(
                    filters=filters, kernel_size=3, padding='same', activation='relu',
                )(x)
            x = MaxPooling2D(pool_size=2,strides=2)(x)
            x = BatchNormalization()(x)

        x = Flatten()(x)

        for n_layers in range(2):
            x = Dense(self.base_filters*64, activation='relu')(x)
            x = Dropout(0.5)(x)
        
        return x
    
    def res_block(self, x, filters):
        r = BatchNormalization()(x)
        r = Activation('relu')(r)
        r = Conv2D(
            filters=filters, kernel_size=3, strides=2, padding='same', 
            kernel_initializer=glorot_uniform(seed=42)
        )(r)
        
        r = BatchNormalization()(r)
        r = Activation('relu')(r)
        
        r = Conv2D(
            filters=filters, kernel_size=3, strides=1, padding='same', 
            kernel_initializer=glorot_uniform(seed=42)
        )(r)
        
        r = Conv2D(
            filters=1, kernel_size=1, strides=1, padding='valid'
        )(r)
        
        x = Conv2D(
            filters=filters, kernel_size=3, strides=2, padding='same', 
            kernel_initializer=glorot_uniform(seed=42)
        )(x)
        
        return Add()([x, r])

    def build_simple(self, input_layer):
        x = Conv2D(
            filters=self.base_filters, kernel_size=3, padding='same', activation='relu',
        )(input_layer)
        x = BatchNormalization()(x)

        x = Flatten()(x)
        
        x = Dense(self.base_filters*8, activation='relu')(x)
        return x

    def build_resnet(self, input_layer):
        x = self.res_block(input_layer, self.base_filters)
        x = self.res_block(x, self.base_filters*2)
        x = self.res_block(x, self.base_filters*4)
        x = self.res_block(x, self.base_filters*8)
        
        x = Activation('relu')(x)
        x = Flatten()(x)

        x = Dense(self.base_filters*8, activation='relu')(x)
        return x

    def build_unet(self, input_layer):
        nb_filter = [32, 64, 128, 256, 512]
        
        conv1 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='valid')(input_layer)
        conv1 = Dropout(0.5)(conv1)
        conv1 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='valid')(conv1)
        conv1 = Dropout(0.5)(conv1)
        pool1 = MaxPooling2D(2, strides=2)(conv1)
        
        conv2 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(pool1)
        conv2 = Dropout(0.5)(conv2)
        conv2 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2)
        conv2 = Dropout(0.5)(conv2)
        pool2 = MaxPooling2D(2, strides=2)(conv2)
        
        upsample1_2 = Conv2DTranspose(nb_filter[0], 2, strides=2, padding='same')(conv2)
        conv1_2 = concatenate([upsample1_2, conv1], axis=3)
        conv3 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_2)
        conv3 = Dropout(0.5)(conv3)
        conv3 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv3)
        conv3 = Dropout(0.5)(conv3)
        
        conv3_1 = Conv2D(128, 3, activation='elu', kernel_initializer='he_normal', padding='same')(pool2)
        conv3_1 = Dropout(0.5)(conv3_1)
        conv3_1 = Conv2D(128, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv3_1)
        conv3_1 = Dropout(0.5)(conv3_1)
        pool3 = MaxPooling2D(2, strides=2)(conv3_1)
        
        upsample2_2 = Conv2DTranspose(nb_filter[1], 2, strides=2, padding='same')(conv3_1)
        conv2_2 = concatenate([upsample2_2, conv2], axis=3) #x10
        conv2_2 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2_2)
        conv2_2 = Dropout(0.5)(conv2_2)
        conv2_2 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2_2)
        conv2_2 = Dropout(0.5)(conv2_2)
        
        upsample1_3 = Conv2DTranspose(nb_filter[0], 2, strides=2, padding='same')(conv2_2)
        conv1_3 = concatenate([upsample1_3, conv1, conv3], axis=3)
        conv1_3 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_3)
        conv1_3 = Dropout(0.5)(conv1_3)
        conv1_3 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_3)
        conv1_3 = Dropout(0.5)(conv1_3)
        
        conv4_1 = Conv2D(256, 3, activation='elu', kernel_initializer='he_normal', padding='same')(pool3)
        conv4_1 = Dropout(0.5)(conv4_1)
        conv4_1 = Conv2D(256, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv4_1)
        conv4_1 = Dropout(0.5)(conv4_1)
        pool4 = MaxPooling2D(2, strides=2)(conv4_1)
        
        upsample3_2 = Conv2DTranspose(nb_filter[2], 2, strides=2, padding='same')(conv4_1)
        conv3_2 = concatenate([upsample3_2, conv3_1], axis=3)
        conv3_2 = Conv2D(128, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv3_2)
        conv3_2 = Dropout(0.5)(conv3_2)
        conv3_2 = Conv2D(128, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv3_2)
        conv3_2 = Dropout(0.5)(conv3_2)
        
        upsample2_3 = Conv2DTranspose(nb_filter[1], 2, strides=2, padding='same')(conv3_2)
        conv2_3 = concatenate([upsample2_3, conv2, conv2_2], axis=3)
        conv2_3 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2_3)
        conv2_3 = Dropout(0.5)(conv2_3)
        conv2_3 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2_3)
        conv2_3 = Dropout(0.5)(conv2_3)
        
        upsample1_4 = Conv2DTranspose(nb_filter[0], 2, strides=2, padding='same')(conv2_3)
        conv1_4 = concatenate([upsample1_4, conv1, conv3, conv1_3], axis=3)
        conv1_4 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_4)
        conv1_4 = Dropout(0.5)(conv1_4)
        conv1_4 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_4)
        conv1_4 = Dropout(0.5)(conv1_4)
        
        conv5_1 = Conv2D(512, 3, activation='elu', kernel_initializer='he_normal', padding='same')(pool4)
        conv5_1 = Dropout(0.5)(conv5_1)
        conv5_1 = Conv2D(512, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv5_1)
        conv5_1 = Dropout(0.5)(conv5_1)
        
        upsample4_2 = Conv2DTranspose(nb_filter[3], 2, strides=2, padding='same')(conv5_1)
        conv4_2 = concatenate([upsample4_2, conv4_1], axis=3) 
        conv4_2 = Conv2D(256, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv4_2)
        conv4_2 = Dropout(0.5)(conv4_2)
        conv4_2 = Conv2D(256, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv4_2)
        conv4_2 = Dropout(0.5)(conv4_2)
        
        upsample3_3 = Conv2DTranspose(nb_filter[2], 2, strides=2, padding='same')(conv4_2)
        conv3_3 = concatenate([upsample3_3, conv3_1, conv3_2], axis=3)
        conv3_3 = Conv2D(128, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv3_3)
        conv3_3 = Dropout(0.5)(conv3_3)
        conv3_3 = Conv2D(128, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv3_3)
        conv3_3 = Dropout(0.5)(conv3_3)
        
        upsample2_4 = Conv2DTranspose(nb_filter[1], 2, strides=2, padding='same')(conv3_3)
        conv2_4 = concatenate([upsample2_4, conv2, conv2_2, conv2_3], axis=3)
        conv2_4 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2_4)
        conv2_4 = Dropout(0.5)(conv2_4)
        conv2_4 = Conv2D(64, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv2_4)
        conv2_4 = Dropout(0.5)(conv2_4)
        
        upsample1_5 = Conv2DTranspose(nb_filter[0], 2, strides=2, padding='same')(conv2_4)
        conv1_5 = concatenate([upsample1_5, conv1, conv3, conv1_3, conv1_4], axis=3)
        conv1_5 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_5)
        conv1_5 = Dropout(0.5)(conv1_5)
        conv1_5 = Conv2D(32, 3, activation='elu', kernel_initializer='he_normal', padding='same')(conv1_5)
        conv1_5 = Dropout(0.5)(conv1_5)

        x = Flatten()(conv1_5)
        x = Dense(self.base_filters*8, activation='relu')(x)      
        return x

    def topless_resnet50(self, input_shape, input_layer):
        resnet = tf.keras.applications.ResNet50(
            include_top=False,
            weights=None,
            input_shape=input_shape,
            pooling=None
        )
        x = resnet(input_layer)
        x = Flatten()(x)
        x = Dense(self.base_filters*8, activation='relu')(x)
        return x

    def build_model(self, output_shape, input_shape, metrics, architecture, loss, output_bias=None):
        if output_bias is not None:
            output_bias = tf.keras.initializers.Constant(output_bias)
         
        input_layer = Input(input_shape)

        match architecture.lower():
            case 'simple':
                x = self.build_simple(input_layer)
            case 'unet':
                x = self.build_unet(input_layer)
            case 'resnet50':
                x = self.topless_resnet50(input_shape, input_layer)
            case 'resnet':
                x = self.build_resnet(input_layer)
            case 'vgg':
                x = self.build_vgg(input_layer)
        
        outputs = Dense(output_shape, activation='sigmoid', bias_initializer=output_bias)(x)
        
        m = Model(inputs=input_layer, outputs=outputs)

        adam = Adam(
            learning_rate=0.001,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-07,
        )

        m.compile(optimizer=adam, loss=loss, metrics=metrics)
        
        return m

In [6]:
architecture = 'unet'
loss = 'binary_focal_crossentropy'
model_path = model_dir.joinpath(f'{architecture}-{loss}-{label_type}.keras')

In [None]:
params = dict(
    dim=(100, 100),
    shards_dir=shards_dir,
    label_type=label_type,
    IDs=IDs, 
    model_path=model_path,
    n_classes=np.load(label_filepaths[0]).shape[0],
    bands=bands,
    architecture=architecture,
    loss=loss,
    batch_size=64,
    base_filters=8,
    epochs=30,
    overwrite=True
)
model, testing_generator = KerasModelCreator(**params).run()

Calculating initial bias... [-2.57281661]
Building model...
Fitting...
Epoch 1/30
[1m 246/3344[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m15:35:47[0m 18s/step - accuracy: 0.0864 - auc: 0.5552 - loss: 5.0320 - macrof1score: 0.0394 - microf1score: 0.0485 - prc: 0.0886 - precision: 0.0714 - recall: 0.0529 - weightedf1score: 0.0492

In [None]:
eval_path = model_path.with_suffix('.eval.csv')
if eval_path.is_file():
    print(pd.read_csv(eval_path))
else:
    r = model.evaluate(x=testing_generator, verbose=1, return_dict=True)
    df = pd.DataFrame.from_dict(r, orient='index', columns=['score'])
    df.to_csv(eval_path)
    print(df)

In [None]:
preds_path = model_path.with_suffix('.preds.npy')
if preds_path.is_file():
    y_pred = np.load(preds_path)
else:
    y_pred = model.predict(x=testing_generator, verbose=1)
    np.save(preds_path, y_pred)

In [None]:
labels_path = model_path.with_suffix('.true.npy')
total_batches = testing_generator.__len__()
if labels_path.is_file():
    y_true = np.load(labels_path)
else:
    y_true = []
    iterator = tqdm(testing_generator, total=total_batches-1)
    for i, (x, y) in enumerate(iterator):
        y_true.append(y)
        if i > total_batches - 2:
            iterator.close()
            break
    y_true = np.vstack(y_true)
    np.save(labels_path, y_true)

In [None]:
y_true.sum(), y_pred.round(0).sum()

In [None]:
cm = confusion_matrix(y_true.flatten().astype(int), y_pred.round(0).flatten().astype(int))
ConfusionMatrixDisplay(confusion_matrix=cm).plot()

In [None]:
# tf.keras.utils.plot_model(model, show_shapes=True)
# model.summary()

In [None]:
import numpy as np
a = np.array([[1,1, 1], [0,1, 0]])
a

In [None]:
a.sum(axis=0)

In [None]:
a[0].sum()/(a.shape[0] * np.bincount(a[0]))

In [None]:
a[1]

In [None]:
np.bincount(a[1])*2

In [None]:
# import subprocess
# subprocess.run(['sudo', 'shutdown', 'now'])