In [2]:
from IPython.display import display, HTML
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from pathlib import Path

from time import sleep

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, multilabel_confusion_matrix
from sklearn.utils import shuffle

from importlib import reload
import sentinel_utils
import keras_model_creator

import itertools

import tensorflow as tf

from data_generator import DataGenerator

In [3]:
sentinel_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12']
soilgrids_band = ['bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'ocd',
                  'ocs', 'phh2o', 'sand', 'silt', 'soc']
all_bands = sentinel_bands + ['Elevation'] + soilgrids_band

In [6]:
reload(sentinel_utils)

seasons = ['06']

loss = 'binary_crossentropy'
batch_size = 64
base_filters = 32
shards_dir = Path.home().joinpath('sentinel_data', 'shards')

utils = sentinel_utils.SentinelUtils(min_occurrences=20000)
selected_classes = utils.get_processed_labels()
data_summary = utils.get_data_summary(
    shards_dir, seasons, all_bands, selected_classes
)

fixed_params = dict(
    seasons=seasons,
    data_summary=data_summary,
    shards_dir=shards_dir,
    loss=loss,
    batch_size=batch_size,
    base_filters=base_filters,
    dropout=0.2,
    epochs=11,
    overwrite=False,
    verbose=1
)

In [9]:
reload(keras_model_creator)

band_groups = [
    ['B2', 'B3', 'B4', 'B5'],
    ['B6', 'B7', 'B8', 'B8A'],
    ['B11', 'B12']
]
for band_group in tqdm(band_groups):
    band_combinations = itertools.chain.from_iterable(
        itertools.combinations(band_group, r) for r in range(1, len(band_group)+1))
    for band_combination in (pbar := tqdm(list(band_combinations), leave=False)):
        pbar.set_description('-'.join(band_combination))
        band_indices = [all_bands.index(b) for b in band_combination]
    
        model_dir = Path('models', 'band_selection',
            f'{loss}-{len(selected_classes.index)}'
            f'-{selected_classes.shape[1]}-{len(bands)}'
            f'-{"_".join(seasons)}-{batch_size}-{base_filters}'
            f'-{"_".join(band_combination)}'
        )
        model_dir.mkdir(parents=True, exist_ok=True)
    
        changing_params = dict(
            selected_classes=selected_classes,
            model_dir=model_dir,
            band_indices=band_indices,
        )
        params = fixed_params | changing_params
        model, testing_generator = keras_model_creator.KerasModelCreator(**params).run()


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

models/binary_crossentropy-237212-7-1-06-64-32-B2
models/binary_crossentropy-237212-7-1-06-64-32-B3
models/binary_crossentropy-237212-7-1-06-64-32-B4
models/binary_crossentropy-237212-7-1-06-64-32-B5
models/binary_crossentropy-237212-7-2-06-64-32-B2_B3
models/binary_crossentropy-237212-7-2-06-64-32-B2_B4
models/binary_crossentropy-237212-7-2-06-64-32-B2_B5
models/binary_crossentropy-237212-7-2-06-64-32-B3_B4
models/binary_crossentropy-237212-7-2-06-64-32-B3_B5
models/binary_crossentropy-237212-7-2-06-64-32-B4_B5
models/binary_crossentropy-237212-7-3-06-64-32-B2_B3_B4
models/binary_crossentropy-237212-7-3-06-64-32-B2_B3_B5
models/binary_crossentropy-237212-7-3-06-64-32-B2_B4_B5
models/binary_crossentropy-237212-7-3-06-64-32-B3_B4_B5
models/binary_crossentropy-237212-7-4-06-64-32-B2_B3_B4_B5


  0%|          | 0/15 [00:00<?, ?it/s]

models/binary_crossentropy-237212-7-1-06-64-32-B6
models/binary_crossentropy-237212-7-1-06-64-32-B7
models/binary_crossentropy-237212-7-1-06-64-32-B8
models/binary_crossentropy-237212-7-1-06-64-32-B8A
models/binary_crossentropy-237212-7-2-06-64-32-B6_B7
models/binary_crossentropy-237212-7-2-06-64-32-B6_B8
models/binary_crossentropy-237212-7-2-06-64-32-B6_B8A
models/binary_crossentropy-237212-7-2-06-64-32-B7_B8
models/binary_crossentropy-237212-7-2-06-64-32-B7_B8A
models/binary_crossentropy-237212-7-2-06-64-32-B8_B8A
models/binary_crossentropy-237212-7-3-06-64-32-B6_B7_B8
models/binary_crossentropy-237212-7-3-06-64-32-B6_B7_B8A
models/binary_crossentropy-237212-7-3-06-64-32-B6_B8_B8A
models/binary_crossentropy-237212-7-3-06-64-32-B7_B8_B8A
models/binary_crossentropy-237212-7-4-06-64-32-B6_B7_B8_B8A


  0%|          | 0/3 [00:00<?, ?it/s]

models/binary_crossentropy-237212-7-1-06-64-32-B11
models/binary_crossentropy-237212-7-1-06-64-32-B12
models/binary_crossentropy-237212-7-2-06-64-32-B11_B12


Evaluate the model for given years and save the results in the model's directory.

In [None]:
# for eval_year in [2017, 2020, 2021, 2022, 2023]:
    # eval_generator = DataGenerator(
        # utils.selected_classes.index, shuffle=False, year=eval_year, **params)
#     preds_path = model_dir.joinpath(f"preds_{params['year']}.npy")
#     if preds_path.is_file():
#         y_pred = np.load(preds_path)
#     else:
#         y_pred = model.predict(x=eval_generator, verbose=1)
#         np.save(preds_path, y_pred)

#     y_true = utils.selected_classes.iloc[:y_pred.shape[0]].to_numpy()
#     cm = confusion_matrix(y_true.flatten(), (y_pred > 0.5).flatten().astype(int))
#     plot = ConfusionMatrixDisplay(confusion_matrix=cm).plot()
#     display(plot)

Visualise the confusion matrix for each class.

In [None]:
# import matplotlib.pyplot as plt

# class_names = utils.selected_classes.columns

# f, axes = plt.subplots(4, 2, figsize=(25, 30))
# axes = axes.ravel()
# for label in range(y_true.shape[1]):
#     cm = confusion_matrix(y_true[..., label].astype(int), (y_pred[..., label] > 0.5).astype(int))
#     disp = ConfusionMatrixDisplay(cm)
#     disp.plot(ax=axes[label], values_format='.4g')
#     disp.ax_.set_title(f'{class_names[label]}')
#     if label < 25:
#         disp.ax_.set_xlabel('')
#     if label % 5 != 0:
#         disp.ax_.set_ylabel('')
#     disp.im_.colorbar.remove()

# plt.subplots_adjust(wspace=0.2, hspace=0.001)
# f.colorbar(disp.im_, ax=axes)
# plt.show()


In [None]:
# import tensorflow
# tensorflow.keras.utils.plot_model(model, show_shapes=True)
# model.summary()

Visualise the model.

In [None]:
# import tensorflow
# tensorflow.keras.utils.plot_model(
#     model, to_file=model_dir.joinpath('model.png'),
#     show_shapes=True, show_layer_activations=True,
# )

Correlate the model with climate variables (to move?)

In [None]:
# import plotly.express as px

# class_names = utils.selected_classes.columns
# indices = utils.selected_classes.index

# preds_path = model_dir.joinpath(f'preds_2020.npy')
# y_prev = np.load(preds_path)

# weather_prev = pd.read_csv(Path('weather_data', 'era5_2020.csv'))
# eval_years = [2021, 2022, 2023]
# corrs = []

# for eval_year in eval_years:
#     preds_path = model_dir.joinpath(f'preds_{eval_year}.npy')
#     y_pred = np.load(preds_path)
#     y_diff = pd.DataFrame(y_pred - y_prev, columns=class_names)
    
#     weather = pd.read_csv(Path('weather_data', f'era5_{eval_year}.csv'))
#     weather_diff = ((weather - weather_prev)
#                     .loc[indices]
#                     .iloc[:y_pred.shape[0]]
#                     .iloc[y_diff.index])
    
#     corr = y_diff.join(weather_diff).corr(method='pearson').round(2)
#     corrs.append(corr)

#     y_prev = y_pred
#     weather_prev = weather

# fig = px.imshow(
#     np.array(corrs),
#     animation_frame=0,
#     labels=dict(color="Corr coef"),
#     x=corrs[0].index,
#     y=corrs[0].columns,
#     title='Annual correlation heatmap',
#     text_auto=True, aspect='auto', zmin=0, height=500
# )
# fig.layout.sliders[0]['currentvalue']['prefix'] = ''
# for year, step in zip(eval_years, fig.layout.sliders[0].steps):
#     step.label = str(year)

# fig