# Ensembler Evaluation 


In [None]:
import sys
sys.path.append('..')

In [None]:
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = '1'
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np

from gerumo.data.dataset import describe_dataset
from gerumo.data.generators import build_generator
from gerumo.utils.engine import (
    setup_cfg, setup_environment, setup_experiment, build_dataset
)
from gerumo.utils.structures import Event, Task
from gerumo.models.base import build_ensembler
from gerumo.visualization.samples import event_regression
from gerumo.data.constants import TELESCOPES

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
args = dotdict()

## Select experiment

In [None]:
# Select a trained model directory
args['config_file'] = '/home/asuka/projects/gerumo2/config/regression/umonne/umonne_ensembler.yml'

# Use the validation datasets for evaluation
args['use_validation'] = True

# Select a test datasets (on axis/off axis)
args['dataset_name'] = None

args['min_obs'] = 3

args['opts'] = [
    #'DATASETS.TEST.EVENTS', '',
    #'DATASETS.TEST.TELESCOPES', ''
]

## Setup

In [None]:
# Load the configurations
cfg = setup_cfg(args)
evaluation_dir = setup_experiment(cfg, ensemble=True)
logger = setup_environment(cfg)

## Load evaluation dataset

In [None]:
# Setup evaluation datasets directory
if args.use_validation:
    evaluation_dataset_name = 'validation'
else:
    evaluation_dataset_name = 'test'
if args.dataset_name is not None:
    evaluation_subfolder = args.dataset_name
else:
    evaluation_subfolder = evaluation_dataset_name
if args.min_obs > 1:
    evaluation_subfolder += f'_min_obs_{args.min_obs}'
evaluation_dir = evaluation_dir / evaluation_subfolder
evaluation_dir.mkdir(exist_ok=True)

# Build evaluation dataset
evaluation_dataset = build_dataset(cfg, evaluation_dataset_name)
# Filter by number of observation
evaluation_dataset = evaluation_dataset[
    evaluation_dataset.groupby('event_unique_id').event_id.transform('size') >= args['min_obs']
]
describe_dataset(evaluation_dataset, logger, save_to=evaluation_dir / 'description.txt')

In [None]:
evaluation_dataset

## Build generator

In [None]:
evaluation_generator = build_generator(cfg, evaluation_dataset)

## Load Ensembler

In [None]:
#%%capture
input_shapes = evaluation_generator.get_input_shape()
ensembler = build_ensembler(cfg, input_shapes)

## Start evaluation

In [None]:
events = []
uncertainties = []
for X, event_true in tqdm(evaluation_generator):
    predictions, y_pred, uncertainty = ensembler(X, uncertainty=True)
    events += Event.add_prediction_list(event_true, predictions, ensembler.task)
    uncertainties += [u for u in uncertainty.numpy()]
evaluation_results = Event.list_to_dataframe(events)
evaluation_results['uncertainty'] = uncertainties
evaluation_results.to_csv(evaluation_dir / 'results.csv', index=False)

In [None]:
evaluation_results

## Plot results

In [None]:
from gerumo.visualization import metrics

In [None]:
evaluation_results = pd.read_csv(evaluation_dir / 'results.csv')
evaluation_results

In [None]:
if ensembler.task is Task.REGRESSION:
    # Target Regression
    targets = [t.split('_')[1] for t in cfg.OUTPUT.REGRESSION.TARGETS]
    metrics.targets_regression(evaluation_results, targets)
    # Resolution
    metrics.reconstruction_resolution(evaluation_results, targets, ylim=(0, 2))
    # Theta2 distribution
    metrics.theta2_distribution(evaluation_results, targets)

In [None]:
if ensembler.task is Task.CLASSIFICATION:
    # Classification Report
    labels = evaluation_generator.output_mapper.classes
    metrics.classification_report(evaluation_results.pred_class_id, evaluation_results.true_class_id, labels=labels)
    metrics.confusion_matrix(evaluation_results.pred_class_id, evaluation_results.true_class_id, labels=labels)


# Sample Visualization

In [None]:
n_samples = 4

In [None]:
# Select random batch
batch_i = np.random.randint(len(evaluation_generator))
X, event_true = evaluation_generator[batch_i]

# Select samples from batch
samples_j = np.random.randint(len(X), size=n_samples)
X = [X[j] for j in samples_j]
event_true = [event_true[j] for j in samples_j]

# Prediction
predictions, y, uncertainties = ensembler(X, uncertainty=True)
event_predictions = Event.add_prediction_list(event_true, predictions, ensembler.task)

In [None]:
if ensembler.task is Task.REGRESSION:
    pass
    # output_type = ensembler.models[ensembler.telescopes[0]].REGRESSION_OUTPUT_TYPE
    # targets = cfg.OUTPUT.REGRESSION.TARGETS
    # targets_domains = cfg.OUTPUT.REGRESSION.TARGETS_DOMAINS
    # for j in range(n_samples):
    #     # Plot input
    #     input_observation = X[j]
    #     # Plot event prediction
    #     event_prediction = event_predictions[j]
    #     model_output = y[j]
    #     event_regression(event_prediction, model_output, output_type, targets, targets_domains)
    #     # Plot uncertainty
    #     uncertainty = uncertainties[j]

In [None]:
if ensembler.task is Task.CLASSIFICATION:
    pass