In [1]:
import logging, os
logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

from pathlib import Path

import tensorflow as tf
import pandas as pd
import numpy as np

from importlib import reload
import sentinel_utils
import best_model_creator
from data_generator import DataGenerator

In [2]:
reload(sentinel_utils)

model_dir = Path('models', 'selected_model')

utils = sentinel_utils.SentinelUtils(
    min_occurrences=20000,
)

selected_classes = utils.get_processed_labels()

training_years = '2017_2018_2019'
data_summary = utils.get_data_summary(
    selected_classes, 
    training_years=training_years,
    overwrite_existing=False
)

model_parent_dir = Path('models')
model_dir = model_parent_dir.joinpath('best_model')
model_dir.mkdir(parents=True, exist_ok=True)

params = dict(
    selected_classes=selected_classes,
    data_summary=data_summary,
    model_dir=model_dir,
    batch_size=64,
    years=training_years,
    epochs=30,
    overwrite=False,
    verbose=1,
    print_log=0,    
)

In [3]:
reload(best_model_creator)
model, testing_generator = best_model_creator.KerasModelCreator(**params).run()

In [4]:
save_path = model_dir.joinpath('testing_gen_eval.npy')
if save_path.is_file():
    y_pred = np.load(save_path)
else:
    y_pred = model.predict(testing_generator)
    np.save(save_path, y_pred)

In [27]:
metrics = [
    tf.keras.metrics.FBetaScore(
        average=None,
        beta=2.0,
        threshold=0.5,
        name='f2_score'
    ),
    tf.keras.metrics.FBetaScore(
        average=None,
        beta=1.0,
        threshold=0.5,
        name='f1_score'
    ),
]

y_true_indices = testing_generator.all_IDs[:y_pred.shape[0]].tolist()
y_true = selected_classes.filter(items=y_true_indices , axis=0)

results = {}
for m in metrics:
    m.update_state(
        y_true, 
        y_pred
    )
    results[m.name] = m.result().numpy()

metrics = {
    'recall': tf.keras.metrics.Recall,
    'precision': tf.keras.metrics.Precision
}

for name, metric in metrics.items():
    m_results = []
    for i in range(y_pred.shape[1]):
        m = metric(class_id=i, thresholds=0.5)
        m.update_state(
            selected_classes.filter(items=y_true_indices , axis=0), 
            y_pred
        )
        m_results.append(m.result().numpy())
    results[name] = m_results

In [28]:
df = pd.DataFrame(
    results, 
    index=selected_classes.columns,
)
df['eval_count'] = y_true.sum().astype(int)
df['train_count'] = (
    selected_classes
    .loc[selected_classes.index.difference(y_true_indices)]
    .sum()
    .astype(int)
)
df.round(2).sort_values('f2_score', ascending=False)

Unnamed: 0,f2_score,f1_score,recall,precision,eval_count,train_count
Pinus,0.82,0.81,0.82,0.79,4982,113811
Picea,0.79,0.78,0.79,0.78,3389,75100
Quercus,0.75,0.75,0.74,0.75,3681,83306
Fagus,0.59,0.62,0.58,0.68,1528,34325
Betula,0.49,0.53,0.46,0.62,1840,40407
Fraxinus,0.25,0.3,0.22,0.45,872,20016
Acer,0.11,0.15,0.09,0.41,866,19588
