In [12]:
from IPython.core.display import display, HTML

import numpy as np
import pandas as pd

from pathlib import Path

from time import sleep

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam, SGD, AdamW

  from IPython.core.display import display, HTML


In [2]:
selected_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']

In [3]:
model_dir = Path('models')
model_dir.mkdir(parents=True, exist_ok=True)

shards_dir = Path('/sentinel_data').joinpath('shards')

sort_key = lambda x: int(x.stem.split('_')[-1])
feature_filepaths = sorted(list(shards_dir.glob('feature_*.npy')), key=sort_key)

Adapted from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

In [4]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, data_dir, batch_size=32, dim=(100,100), n_channels=13,
                 n_classes=242, shuffle=True, **kwargs):
        super().__init__(**kwargs)
        self.dim = dim
        self.batch_size = batch_size
        self.data_dir = data_dir
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.use_multiprocessing = True
        self.workers = 8
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples'
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, self.n_classes))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load(self.data_dir.joinpath(f'feature_{ID}.npy'))
            # Store class
            y[i] = np.load(self.data_dir.joinpath(f'label_{ID}.npy'))
        
        return X, y

In [5]:
class SleepCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        sleep(300)
    def on_epoch_begin(self, epoch, logs=None):
        if epoch > 1:
            sleep(300)  

In [6]:
class KerasModelCreator:
    def display_logger(self, log_file, metrics):
        metric_names = [m if isinstance(m, str) else m.name for m in metrics]
        if log_file.is_file():
            val_metrics = ['val_loss'] + ['val_' + x for x in metric_names]
            df = pd.read_csv(log_file)[['epoch', 'loss'] + metric_names + val_metrics]
            df['epoch'] += 1
            print('Previous training:')
            display(HTML(df.to_html(index=False)))
    
    def define_callbacks_and_logger(self, model_path, model_savepoint, log_file, metrics):
        metric_names = [m if isinstance(m, str) else m.name for m in metrics]

        callbacks = [
            tf.keras.callbacks.BackupAndRestore(
                model_savepoint, save_freq='epoch', delete_checkpoint=False
            ),
            tf.keras.callbacks.CSVLogger(log_file, append=True),
            tf.keras.callbacks.ModelCheckpoint(
                model_path, monitor='val_prc', save_best_only=True, 
                save_freq='epoch', initial_value_threshold=0.4,
                verbose=1,
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_prc', factor=0.6, patience=2, min_lr=3e-6,
                verbose=1,
            ),
            tf.keras.callbacks.EarlyStopping(
                monitor='val_prc', 
                verbose=1,
                patience=10,
                mode='max',
                restore_best_weights=True
            ),
            SleepCallback()
        ]
        return callbacks
    
    def get_initial_bias(self, shards_dir, IDs):
        all_labels = []
        for ID in IDs:
            f = shards_dir.joinpath(f'label_{ID}.npy')
            all_labels.append(np.load(f))
        all_labels = np.vstack(all_labels)
        neg, pos = np.bincount(all_labels.astype(int).flatten())
        return np.log([pos/neg])
        
    def build_model(self, output_shape, input_shape, metrics, output_bias=None):
        if output_bias is not None:
            output_bias = tf.keras.initializers.Constant(output_bias)
            
        initial_units = 16
            
        m = tf.keras.Sequential()
        m.add(Input(input_shape))
        
        for filters in [initial_units, initial_units*2]:
            for _ in range(2):
                m.add(Conv2D(
                    filters=filters, kernel_size=3, padding='same', activation='relu',
                ))
            m.add(MaxPooling2D(pool_size=2,strides=2))
            m.add(BatchNormalization())

        for filters in [initial_units*4, initial_units*8]:
            for _ in range(3):
                m.add(Conv2D(
                    filters=filters, kernel_size=3, padding='same', activation='relu',
                ))
            m.add(MaxPooling2D(pool_size=2,strides=2))
            m.add(BatchNormalization())

        m.add(Flatten())

        for n_layers in range(2):
            m.add(Dense(initial_units*64, activation='relu'))
            m.add(Dropout(0.5))
        
        m.add(Dense(output_shape, activation='sigmoid'))

        m.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
        
        return m
    
    def run(self, shards_dir, IDs, model_path, batch_size=10, epochs=10, overwrite=False):
        ''' 
        If not overwrite and there's an existing model, the model will 
        continue trainingw if the given epoch is bigger than the previous,
        else just evaluate.
        Ensure train splits are the same across continuations / evaluations
        by not modifying the random_state in split_and_normalise.
        '''
        model_savepoint = model_path.parent.joinpath(model_path.stem)
        log_file = model_path.with_suffix('.log')
        
        prc = tf.keras.metrics.AUC(name='prc', curve='PR')
        metrics = [
            'accuracy', 'recall', 'precision',
            'mse', 'r2_score', 'auc', prc
        ]       

        if overwrite:
            for f in [model_path, log_file] + list(model_savepoint.glob('*')):
                f.unlink(missing_ok=True)
                
        self.display_logger(log_file, metrics)
        
        callbacks = self.define_callbacks_and_logger(
            model_path, model_savepoint, log_file, metrics)
        
        params = {
            'dim': (100, 100),
            'batch_size': batch_size,
            'n_classes': 242,
            'n_channels': 13,
            'shuffle': True
        }
        
        training_ids, test_ids = train_test_split(IDs, test_size=0.1, random_state=42)
        validation_ids, test_ids = train_test_split(test_ids, test_size=0.9, random_state=42)
        
        training_generator = DataGenerator(training_ids, shards_dir, **params)
        validation_generator = DataGenerator(validation_ids, shards_dir, **params)
        
        print('Loading model...')
        if model_path.is_file():
            model = tf.keras.models.load_model(model_path)
        else:
            initial_bias = self.get_initial_bias(shards_dir, IDs[:2000])
            model = self.build_model(
                params['n_classes'], (*params['dim'], params['n_channels']), metrics,
                output_bias=initial_bias,
            )
        print('Fitting...')
        model.fit(
            x=training_generator,
            validation_data=validation_generator,
            epochs=epochs,
            callbacks=callbacks
        )
        
        return model

In [7]:
parts = [f.stem.split('_')[-1] for f in feature_filepaths]
model_name = f'conv_parts_{parts[0]}_to_{parts[-1]}.keras'
model_path = model_dir.joinpath(model_name)
print(model_path)

IDs = [int(f.stem.split('_')[-1]) for f in feature_filepaths]
IDs = shuffle(IDs, random_state=42)

models/conv_parts_0_to_111999.keras


In [13]:
KerasModelCreator().run(
    shards_dir,
    IDs, 
    model_path, 
    batch_size=100, 
    epochs=6,
    overwrite=False
)

Previous training:


epoch,loss,accuracy,recall,precision,mse,r2_score,auc,prc,val_loss,val_accuracy,val_recall,val_precision,val_mse,val_r2_score,val_auc,val_prc
1,0.003693,0.743482,0.951721,0.870834,0.001016,0.18577,0.996507,0.977162,8.235919e-08,0.866364,1.0,1.0,1.323611e-11,0.206612,1.0,1.0


Loading model...
Fitting...
Epoch 2/5


  trackable.load_own_variables(weights_store.get(inner_path))
2024-06-11 18:02:18.669758: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 3 of 8
2024-06-11 18:02:37.470363: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 7 of 8
2024-06-11 18:02:41.893396: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.8345 - auc: 0.9995 - loss: 3.0170e-04 - mse: 6.6617e-05 - prc: 0.9984 - precision: 0.9953 - r2_score: 0.2049 - recall: 0.9934
Epoch 2: val_prc did not improve from 0.40000
[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4870s[0m 5s/step - accuracy: 0.8345 - auc: 0.9995 - loss: 3.0156e-04 - mse: 6.6586e-05 - prc: 0.9984 - precision: 0.9953 - r2_score: 0.2049 - recall: 0.9934 - val_accuracy: 0.9900 - val_auc: 1.0000 - val_loss: 8.9853e-08 - val_mse: 1.2535e-10 - val_prc: 1.0000 - val_precision: 1.0000 - val_r2_score: 0.2066 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 3/5


2024-06-11 19:28:25.514774: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 3 of 8
2024-06-11 19:28:43.968832: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 7 of 8
2024-06-11 19:28:48.771932: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.9465 - auc: 0.9996 - loss: 2.1272e-04 - mse: 3.6859e-05 - prc: 0.9986 - precision: 0.9971 - r2_score: 0.2054 - recall: 0.9968
Epoch 3: val_prc did not improve from 0.40000
[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4984s[0m 5s/step - accuracy: 0.9465 - auc: 0.9996 - loss: 2.1270e-04 - mse: 3.6854e-05 - prc: 0.9986 - precision: 0.9971 - r2_score: 0.2054 - recall: 0.9968 - val_accuracy: 0.9936 - val_auc: 1.0000 - val_loss: 1.2751e-09 - val_mse: 3.9243e-15 - val_prc: 1.0000 - val_precision: 1.0000 - val_r2_score: 0.2066 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 4/5


2024-06-11 20:56:29.703626: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 3 of 8
2024-06-11 20:56:47.779886: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 7 of 8
2024-06-11 20:56:52.574747: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.9862 - auc: 0.9998 - loss: 1.0244e-04 - mse: 1.4390e-05 - prc: 0.9993 - precision: 0.9988 - r2_score: 0.2062 - recall: 0.9989
Epoch 4: val_prc did not improve from 0.40000
[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4893s[0m 5s/step - accuracy: 0.9862 - auc: 0.9998 - loss: 1.0245e-04 - mse: 1.4392e-05 - prc: 0.9993 - precision: 0.9988 - r2_score: 0.2062 - recall: 0.9989 - val_accuracy: 0.9336 - val_auc: 0.9944 - val_loss: 0.0058 - val_mse: 5.2993e-04 - val_prc: 0.9585 - val_precision: 0.9467 - val_r2_score: 0.1984 - val_recall: 0.9634 - learning_rate: 0.0010
Epoch 5/5


2024-06-11 22:23:02.642590: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 3 of 8
2024-06-11 22:23:20.277602: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 7 of 8
2024-06-11 22:23:24.898573: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.9910 - auc: 0.9997 - loss: 1.2980e-04 - mse: 1.7773e-05 - prc: 0.9990 - precision: 0.9986 - r2_score: 0.2061 - recall: 0.9986
Epoch 5: val_prc did not improve from 0.40000
[1m1008/1008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4818s[0m 5s/step - accuracy: 0.9910 - auc: 0.9997 - loss: 1.2976e-04 - mse: 1.7767e-05 - prc: 0.9990 - precision: 0.9986 - r2_score: 0.2061 - recall: 0.9986 - val_accuracy: 1.0000 - val_auc: 1.0000 - val_loss: 2.2048e-07 - val_mse: 5.0616e-11 - val_prc: 1.0000 - val_precision: 1.0000 - val_r2_score: 0.2066 - val_recall: 1.0000 - learning_rate: 0.0010
Restoring model weights from the end of the best epoch: 2.


<Sequential name=sequential_1, built=True>

In [9]:
# tf.keras.models.load_model(model_path).summary()

In [10]:
# import subprocess
# subprocess.run(['sudo', 'shutdown', 'now'])