In [1]:
%env TF_CPP_MIN_LOG_LEVEL=3

import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from tqdm.notebook import tqdm
from pathlib import Path
import itertools

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, multilabel_confusion_matrix
from sklearn.utils import shuffle

import tensorflow as tf

from importlib import reload
import sentinel_utils
import keras_model_creator
from data_generator import DataGenerator

pd.options.mode.copy_on_write = True

env: TF_CPP_MIN_LOG_LEVEL=3


In [2]:
sentinel_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12']

In [3]:
reload(sentinel_utils)

seasons = ['06']

loss = 'binary_crossentropy'
batch_size = 64
base_filters = 32
shards_dir = Path.home().joinpath('sentinel_data', 'shards')

utils = sentinel_utils.SentinelUtils(min_occurrences=20000)
selected_classes = utils.get_processed_labels()
data_summary = utils.get_data_summary(
    shards_dir, seasons, selected_classes
)

fixed_params = dict(
    seasons=seasons,
    data_summary=data_summary,
    shards_dir=shards_dir,
    loss=loss,
    batch_size=batch_size,
    base_filters=base_filters,
    dropout=0.2,
    epochs=10,
    overwrite=False,
    verbose=1,
    print_log=0
)

In [None]:
reload(keras_model_creator)

band_groups = [
    ['B2', 'B3', 'B4', 'B5'],
    ['B6', 'B7', 'B8', 'B8A'],
    ['B11', 'B12']
]

model_parent_dir = Path('models', 'band_selection')

for band_group in tqdm(band_groups):
    band_combinations = itertools.chain.from_iterable(
        itertools.combinations(band_group, r) for r in range(1, len(band_group)+1))
    for band_combination in (pbar := tqdm(list(band_combinations), leave=False)):
        pbar.set_description('-'.join(band_combination))
        band_indices = [sentinel_bands.index(b) for b in band_combination]
    
        model_dir = model_parent_dir.joinpath(
            f'{loss}-{len(selected_classes.index)}'
            f'-{selected_classes.shape[1]}-{len(band_combination)}'
            f'-{"_".join(seasons)}-{batch_size}-{base_filters}'
            f'-{"_".join(band_combination)}'
        )
        model_dir.mkdir(parents=True, exist_ok=True)
    
        changing_params = dict(
            selected_classes=selected_classes,
            model_dir=model_dir,
            band_indices=band_indices,
        )
        params = fixed_params | changing_params
        model, testing_generator = keras_model_creator.KerasModelCreator(**params).run()


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 9/10


I0000 00:00:1722196774.169384     289 service.cc:145] XLA service 0xb94fb80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1722196774.169421     289 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Ti, Compute Capability 8.9
I0000 00:00:1722196789.568237     289 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - accuracy: 0.5121 - auc: 0.9203 - loss: 0.2980 - macrof1score: 0.5357 - microf1score: 0.6995 - prc: 0.7964 - precision: 0.7666 - recall: 0.6431 - weightedf1score: 0.6713






Epoch 9: val_recall improved from 0.50000 to 0.61785, saving model to models/band_selection/binary_crossentropy-237212-7-1-06-64-32-B3/model.keras
[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 150ms/step - accuracy: 0.5121 - auc: 0.9203 - loss: 0.2980 - macrof1score: 0.5357 - microf1score: 0.6995 - prc: 0.7964 - precision: 0.7666 - recall: 0.6431 - weightedf1score: 0.6713 - val_accuracy: 0.4972 - val_auc: 0.9099 - val_loss: 0.3170 - val_macrof1score: 0.5260 - val_microf1score: 0.6811 - val_prc: 0.7758 - val_precision: 0.7588 - val_recall: 0.6178 - val_weightedf1score: 0.6557 - learning_rate: 5.0000e-05
Epoch 10/10
[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.5158 - auc: 0.9239 - loss: 0.2917 - macrof1score: 0.5487 - microf1score: 0.7071 - prc: 0.8051 - precision: 0.7699 - recall: 0.6537 - weightedf1score: 0.6808
Epoch 10: val_recall did not improve from 0.61785
[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

Evaluate the model for given years and save the results in the model's directory.

In [None]:
all_bands = np.array(band_groups).flatten()
best_metric = 'val_weightedf1score'
best_rows = []

for band_group in tqdm(band_groups):
    band_combinations = itertools.chain.from_iterable(
        itertools.combinations(band_group, r) for r in range(1, len(band_group)+1)
    )
    for band_combination in band_combinations:
        model_dirs = list(model_parent_dir.glob(f'*-{"_".join(band_combination)}-*'))
    
        for model_dir in reversed(model_dirs):
            df = pd.read_csv(model_dir.joinpath('model.log'))
            best = df[df[best_metric] == df[best_metric].max()]
            
            best[all_bands] = [int(b in band_combination)*2 for b in all_bands]
                
            best_rows.append(best)
            
best_df = pd.concat(best_rows).round(2).reset_index(drop=True)

In [None]:
import plot_utils
reload(plot_utils)
plot_utils.PlotUtils().line_heatmap(
    best_df, all_bands, [0.5, 0.5]
)