In [1]:
import sys
sys.dont_write_bytecode = True

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from pathlib import Path

from time import sleep

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, multilabel_confusion_matrix
from sklearn.utils import shuffle

from sentinel_utils import SentinelUtils
from keras_model_creator import KerasModelCreator

2024-06-27 15:57:58.799385: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-27 15:57:58.802834: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-27 15:57:58.838519: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
all_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']
selected_bands = all_bands
bands = [all_bands.index(b) for b in selected_bands]

In [3]:
shards_dir = Path.home().joinpath('sentinel_data').joinpath('shards')

sort_key = lambda x: int(x.stem.split('_')[-1])
label_shards = sorted(list(shards_dir.joinpath(f'labels_2017').glob('label_*.npy')), key=sort_key)
feature_shards = sorted(list(shards_dir.joinpath(f'features_2017').glob('feature_*.npy')), key=sort_key)

p = SentinelUtils(
    shuffle(feature_shards, random_state=42)[:50000],
    label_shards, min_occurrences=5000, 
    overwrite_summary=False, overwrite_selection=False
)

data_summary = p.data_summary
IDs = p.keep_shards
keep_classes = p.keep_classes

# IDs = shuffle([int(f.stem.split('_')[-1]) for f in label_shards], random_state=42)

In [4]:
data_tag = '2017'
architecture = 'simple'
loss = 'binary_crossentropy'
dim = (np.load(feature_shards[0]).shape[:2])

n_samples = len(IDs)

model_dir = Path('models').joinpath(
    f'{architecture}-{loss}-{n_samples}-{len(keep_classes)}-{data_tag}'
)
model_dir.mkdir(parents=True, exist_ok=True)

model_dir

PosixPath('models/simple-binary_crossentropy-237797-30-2017')

In [11]:
from importlib import reload
import keras_model_creator
reload(keras_model_creator)
from keras_model_creator import KerasModelCreator

params = dict(
    dim=dim,
    shards_dir=shards_dir,
    data_tag='2017',
    IDs=IDs,
    keep_classes=keep_classes,
    model_dir=model_dir,
    bands=bands,
    architecture=architecture,
    loss=loss,
    batch_size=64,
    base_filters=32,
    dropout=0.2,
    epochs=5,
    overwrite=False
)

model, testing_generator = KerasModelCreator(**params).run()

Building model...
Fitting...


IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [None]:
eval_path = model_dir.joinpath('eval.csv')
if eval_path.is_file():
    print(pd.read_csv(eval_path))
else:
    r = model.evaluate(x=testing_generator, verbose=1, return_dict=True)
    df = pd.DataFrame.from_dict(r, orient='index', columns=['score'])
    df.to_csv(eval_path)
    print(df)

In [None]:
preds_path = model_dir.joinpath('preds.npy')
if preds_path.is_file():
    y_pred = np.load(preds_path)
else:
    y_pred = model.predict(x=testing_generator, verbose=1)
    np.save(preds_path, y_pred)

In [None]:
labels_path = model_dir.joinpath('true.npy')
total_batches = testing_generator.__len__()
if labels_path.is_file():
    y_true = np.load(labels_path)
else:
    y_true = []
    for i, (x, y) in enumerate(tqdm(testing_generator, total=total_batches-1)):
        y_true.append(y)
        if i > total_batches - 2:
            break
    y_true = np.vstack(y_true)
    np.save(labels_path, y_true)

In [None]:
cm = confusion_matrix(y_true.flatten().astype(int), (y_pred > 0.3).flatten().astype(int))
ConfusionMatrixDisplay(confusion_matrix=cm).plot()

In [None]:
import matplotlib.pyplot as plt

cols = ['Abies alba', 'Acer campestre', 'Acer pseudoplatanus', 'Alnus glutinosa', 'Alnus incana', 
        'Betula pendula', 'Betula pubescens', 'Carpinus betulus', 'Castanea sativa', 
        'Fagus sylvatica', 'Fraxinus excelsior', 'Larix decidua', 'Picea abies', 
        'Picea sitchensis', 'Pinus halepensis', 'Pinus nigra', 'Pinus pinaster', 
        'Pinus sylvestris', 'Populus tremula', 'Prunus avium', 'Pseudotsuga menziesii', 
        'Quercus faginea', 'Quercus ilex', 'Quercus petraea', 'Quercus pubescens', 'Quercus pyrenaica', 
        'Quercus robur', 'Quercus suber', 'Salix caprea', 'Sorbus aucuparia']

f, axes = plt.subplots(6, 5, figsize=(25, 30))
axes = axes.ravel()
for label in range(y_true.shape[1]):
    cm = confusion_matrix(y_true[..., label].astype(int), (y_pred[..., label] > 0.5).astype(int))
    disp = ConfusionMatrixDisplay(cm)
    disp.plot(ax=axes[label], values_format='.4g')
    disp.ax_.set_title(f'{cols[label]}')
    if label < 25:
        disp.ax_.set_xlabel('')
    if label % 5 != 0:
        disp.ax_.set_ylabel('')
    disp.im_.colorbar.remove()

plt.subplots_adjust(wspace=0.2, hspace=0.001)
f.colorbar(disp.im_, ax=axes)
plt.show()


In [None]:
# tf.keras.utils.plot_model(model, show_shapes=True)
# model.summary()

In [None]:
# import subprocess
# subprocess.run(['sudo', 'shutdown', 'now'])