In [None]:
import pandas as pd
import tensorflow as tf
import dataset_ops
import functools
from transfer_learning import evaluate_model
import numpy as np  # noqa
import matplotlib.pyplot as plt  # noqa
from model_helper import make_model
from metrics import F1, Precision, Recall, soft_dice_loss, remove_clutter_one_sample, ClassPrecision, ClassRecall
import datetime
import cuda
import pandas_format  # noqa
from pathlib import Path
from tensorboard.plugins.hparams import api as hp
try:
    from tqdm import notebook as tqdm
except ImportError:
    tqdm = None

%matplotlib inline
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

cuda.initialize()


dataset_manager = dataset_ops.MicroPilotTestsManager(dataset_dir=Path('h5'), runs_filename='runs.hdf')
# dataset_manager = dataset_ops.PaparazziTestManager(dataset_dir=Path('pprz_h5'), runs_filename='pprz_runs.hdf')
all_runs = dataset_manager.get_all_available_tests()


selected_runs = all_runs.loc[(all_runs['Test Length'] > 200) & (all_runs['Test Length'] < 20000)]
# selected_runs = selected_runs.iloc[:40]
# selected_runs = all_runs.sample(frac=1, axis=1, random_state=55)
# tl_plot = selected_runs['Test Length'].plot(kind='hist', bins=25, figsize=[10,5])
# tl_plot.tick_params(labelsize=14)
# tl_plot.set_xlim([10,18000])
# tl_plot.set_xlabel('Test Length ($l_k$)', fontsize=15)
# tl_plot.set_ylabel('Number of Tests', fontsize=15)
# tl_plot.figure.savefig('paper_data/test_lengths.png')
# #selected_runs
# print(all_runs.shape, selected_runs.shape)
# selected_runs['Test Length'].mean()

In [None]:
inputs = ('SpeedFts', 'Pitch', 'Roll', 'Yaw', 'current_altitude', )
outputs= ('elev', 'ai', 'rdr', 'throttle', 'Flaps')

max_length = 18000

tfdataset = dataset_ops.TensorflowDataset(dataset_manager)
train_dataset, test_dataset, validation_dataset = dataset_ops.split_dataset(
    tfdataset.get_dataset(selected_runs, features=inputs+outputs, max_length=max_length),
    split_proportion=(6, 1, 3)
)  # 60% 10% 30% += 100%
train_dataset, test_dataset, validation_dataset = (
dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
        .batch(25)
        .shuffle(buffer_size=15)
        for dataset in (train_dataset, test_dataset, validation_dataset)
)

assert dataset_manager.count_states() > 0

train_dataset.element_spec

In [None]:
def create_prec_recall_f1(tolerance):
    prec = Precision(name=f'prec_{tolerance}', tolerance=tolerance)
    recl = Recall(name=f'recall_{tolerance}', tolerance=tolerance)

    return [
        # ClassPrecision(),
        # ClassRecall(),
        prec,
        recl,
        F1(prec, recl),
    ]


evaluation_metrics = create_prec_recall_f1(25)
metrics_reporting = (create_prec_recall_f1(5)[:-1] +
                        create_prec_recall_f1(15)[:-1] +
                        create_prec_recall_f1(25)[:-1] +
                        [ClassPrecision(), ClassRecall()])

optimizer = tf.keras.optimizers.Adam(lr=3e-5)

# MP:
mp_model_builder = functools.partial(make_model, inputs, outputs, max_length, n_states=dataset_manager.count_states())

full_model = mp_model_builder(convs=[(64, 3), (64, 5), (64, 10), (64, 15), (64, 20)], grus=[128, 128], name='mp_model')
full_model.summary()
full_model.compile(loss=soft_dice_loss, optimizer=optimizer, metrics=evaluation_metrics)

cnn_baseline_model = mp_model_builder(convs=[(64, 3), (64, 5), (64, 10), (64, 15), (64, 20)], grus=[], name='convolutional_baseline')
cnn_baseline_model.summary()
cnn_baseline_model.compile(loss=soft_dice_loss, optimizer=optimizer, metrics=evaluation_metrics)

rnn_baseline_model = mp_model_builder(convs=[(1, 1)], grus=[128, 128], name='recurrent_baseline')
rnn_baseline_model.summary()
rnn_baseline_model.compile(loss=soft_dice_loss, optimizer=optimizer, metrics=evaluation_metrics)

evaluation_results = {}

In [None]:
epochs = 500
# epochs = 5

for model_name, model in zip(('full', 'rnn', 'cnn',), (full_model, rnn_baseline_model, cnn_baseline_model)):
    training_start_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    log_dir="logs/fit/" + training_start_time
    file_name = f'models/mp_cameraready-{model_name}-{training_start_time}-{epochs}.h5'
    if Path(file_name).exists():
        model.load_weights(file_name)
    else:
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

        history = model.fit(train_dataset,
                            epochs=epochs,
                            validation_data=validation_dataset,
                            callbacks=[
                                tensorboard_callback,
                                tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5),
                            ])
        model.save(file_name)
        tf.keras.utils.plot_model(model, show_shapes=True, to_file=file_name.replace('.h5', '.png'))

    evaluation_results[model_name] = evaluate_model(model, validation_dataset)

In [None]:
rq3 = pd.DataFrame(evaluation_results).set_index(pd.Index(
    ['prec_5', 'recall_5', 'prec_15', 'recall_15', 'prec_25', 'recall_25', 'class_precision', 'class_recall'])).T
for _tau in [5, 15, 25]:
    p = rq3[f'prec_{_tau}']
    r = rq3[f'recall_{_tau}']
    f1 = 2 * p * r / (p + r)
    rq3.insert(rq3.columns.to_list().index(f'recall_{_tau}') + 1, f"F1_{_tau}", f1)

p, r = rq3['class_precision'], rq3['class_recall']
rq3['class_F1'] = 2 * p * r / (p + r)

In [None]:
with pandas_format.PandasFloatFormatter('{:,.2f}%'):
    print((rq3*100).T.to_latex())

In [None]:
fig, axs = plt.subplots(15, 2, figsize=(15, 8), sharex=True)
# N = 2500
N = 1000

folder_name = Path('plots') / 'output_compare' / ('categorical_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
if not folder_name.exists():
    folder_name.mkdir(parents=True)

print('Plotting in', folder_name)

results = []
for set_name, data_set in (('Training', train_dataset), ('Test', test_dataset), ('Validation', validation_dataset)):
    for bi, data in data_set.unbatch().batch(30).enumerate():
        ins, ground_truth = data
        prediction = model.predict_on_batch(ins)
        mask = tf.squeeze(ins['mask'], axis=-1)
        ground_truth = tf.squeeze(ground_truth)

        for metric in metrics_reporting: 
            metric.reset_states()
            metric.update_state(ground_truth, prediction)
        
        prediction = tf.math.argmax(prediction, axis=-1)
        no_clutter = tf.map_fn(remove_clutter_one_sample, prediction)
        ground_truth = tf.math.argmax(ground_truth, axis=-1)
        
        run_length = tf.math.minimum(tf.argmin(mask, axis=-1), N)
        max_run_length_in_batch = int(tf.math.reduce_max(run_length))

        results.append(
            [set_name] + [float(metric.result()) for metric in metrics_reporting]  
        )

        for prednc, truth, idx, ax in zip(no_clutter, ground_truth, run_length, axs.reshape(-1)):
            if idx == 0:
                idx = N
            truth, prednc = truth[:idx], prednc[:idx]

            concat = tf.stack((prednc, truth), axis=0)
            # ax.imshow(concat, aspect='auto', interpolation='nearest', vmin=0, vmax=dataset_manager.count_states())
            ax.imshow(concat, aspect='auto', interpolation='nearest')#, vmin=0, vmax=dataset_manager.count_states())
            ax.set_yticklabels(['', '$\\hat{O}$', '$O$'])
            ax.set_xlim([1, max_run_length_in_batch])

        plt.tight_layout()
        fig.savefig(folder_name / f'{set_name}_{bi}.png')
        for ax in axs.reshape(-1): ax.clear()
plt.close()
columns = ['Dataset'] + [metric.name for metric in metrics_reporting] 
results = pd.DataFrame(results, columns=columns)

# results['class_precision'] *= 100
# results['class_recall'] *= 100
results['class_F1'] = 2*results['class_precision']*results['class_recall'] / (results['class_precision']+results['class_recall'])

results

In [None]:
df = results.groupby('Dataset').aggregate('mean')*100
for _tau in [5, 15, 25]:
    p = df[f'prec_{_tau}']
    r = df[f'recall_{_tau}']
    f1 = 2 * p * r / (p + r)
    df.insert(df.columns.to_list().index(f'recall_{_tau}') + 1, f"F1_{_tau}", f1)

df

In [None]:
paper_results = df

with pandas_format.PandasFloatFormatter('{:,.2f}%'):
    print(paper_results.loc[['Validation']].T.to_latex())

In [None]:
with pandas_format.PandasFloatFormatter('{:,.2f}%'):
    print(paper_results.loc[['Validation', 'Test', 'Training'], ~paper_results.columns.str.contains('class_')].to_latex())

In [None]:
with pandas_format.PandasFloatFormatter('{:,.2f}%'):
    display(paper_results.loc[['Validation']].T)