In [1]:
%env TF_CPP_MIN_LOG_LEVEL=3

import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from tqdm.notebook import tqdm
from pathlib import Path
import itertools

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, multilabel_confusion_matrix
from sklearn.utils import shuffle

import tensorflow as tf

from importlib import reload
import sentinel_utils
import keras_model_creator
from data_generator import DataGenerator

pd.options.mode.copy_on_write = True

env: TF_CPP_MIN_LOG_LEVEL=3


In [2]:
sentinel_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12']

In [3]:
reload(sentinel_utils)

seasons = ['06']

loss = 'binary_crossentropy'
batch_size = 64
base_filters = 32
shards_dir = Path.home().joinpath('sentinel_data', 'shards')

utils = sentinel_utils.SentinelUtils(min_occurrences=20000)
selected_classes = utils.get_processed_labels()
data_summary = utils.get_data_summary(
    shards_dir, seasons, selected_classes
)

fixed_params = dict(
    seasons=seasons,
    data_summary=data_summary,
    shards_dir=shards_dir,
    loss=loss,
    batch_size=batch_size,
    base_filters=base_filters,
    dropout=0.2,
    epochs=10,
    overwrite=False,
    verbose=1,
    print_log=0
)

In [None]:
reload(keras_model_creator)
band_groups = [
    ['B2', 'B3', 'B4', 'B5'],
    ['B6', 'B7', 'B8', 'B8A'],
    ['B11', 'B12']
]

model_parent_dir = Path('models', 'band_selection')

for i, band_group in enumerate(band_groups):
    band_combinations = itertools.chain.from_iterable(
        itertools.combinations(band_group, r) for r in range(1, len(band_group)+1))
    for band_combination in (pbar := tqdm(list(band_combinations), leave=False)):
        pbar.set_description('-'.join(band_combination))
        band_indices = [sentinel_bands.index(b) for b in band_combination]
    
        model_dir = model_parent_dir.joinpath(
            f'{loss}-{len(selected_classes.index)}'
            f'-{selected_classes.shape[1]}-{len(band_combination)}'
            f'-{"_".join(seasons)}-{batch_size}-{base_filters}'
            f'-{"_".join(band_combination)}'
        )
        model_dir.mkdir(parents=True, exist_ok=True)
    
        changing_params = dict(
            selected_classes=selected_classes,
            model_dir=model_dir,
            band_indices=band_indices,
        )
        params = fixed_params | changing_params
        keras_model_creator.KerasModelCreator(**params).run()
        tf.keras.backend.clear_session()


  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 1/10


I0000 00:00:1722259222.605668   24725 service.cc:145] XLA service 0x7fb9680223f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1722259222.605709   24725 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Ti, Compute Capability 8.9
I0000 00:00:1722259235.454367   24725 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.3356 - auc: 0.8060 - loss: 0.4391 - macrof1score: 0.2560 - microf1score: 0.4381 - prc: 0.5504 - precision: 0.6126 - recall: 0.3428 - weightedf1score: 0.3943






Epoch 1: val_recall did not improve from 0.50000
[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 64ms/step - accuracy: 0.3356 - auc: 0.8060 - loss: 0.4391 - macrof1score: 0.2560 - microf1score: 0.4381 - prc: 0.5504 - precision: 0.6126 - recall: 0.3428 - weightedf1score: 0.3943 - val_accuracy: 0.3441 - val_auc: 0.8398 - val_loss: 0.4135 - val_macrof1score: 0.3539 - val_microf1score: 0.5496 - val_prc: 0.6107 - val_precision: 0.6369 - val_recall: 0.4834 - val_weightedf1score: 0.5027 - learning_rate: 1.0000e-04
Epoch 2/10
[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.3873 - auc: 0.8543 - loss: 0.3911 - macrof1score: 0.3685 - microf1score: 0.5537 - prc: 0.6337 - precision: 0.6633 - recall: 0.4751 - weightedf1score: 0.5153
Epoch 2: val_recall improved from 0.50000 to 0.53986, saving model to models/band_selection/binary_crossentropy-237212-7-1-06-64-32-B2/model.keras
[1m3550/3550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [None]:
all_bands = utils.all_bands[:10]
best_metric = 'val_weightedf1score'
best_rows = []

for band_group in band_groups[:1]:
    band_combinations = itertools.chain.from_iterable(
        itertools.combinations(band_group, r) for r in range(1, len(band_group)+1)
    )
    for band_combination in list(band_combinations)[:4]:
        model_dirs = list(model_parent_dir.glob(f'*-{"_".join(band_combination)}*'))
        for model_dir in reversed(model_dirs):
            
            df = pd.read_csv(model_dir.joinpath('model.log'))
            best = df[df[best_metric] == df[best_metric].max()]
            
            best[all_bands] = [int(b in band_combination)*2 for b in all_bands]
                
            best_rows.append(best)
            
best_df = pd.concat(best_rows).round(2).reset_index(drop=True)

In [None]:
import plot_utils
reload(plot_utils)
plot_utils.PlotUtils().line_heatmap(
    best_df, all_bands, [0.5, 0.5]
)