In [None]:
%load_ext autoreload
%autoreload 2

Declaration of parameters (you must also add a tag for this cell - parameters)

In [None]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={   
    "threshold_accuracy" : 0.5
}

In [None]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(    
    tmp_inputs =
    [
        { ENTITY_NAME: "inference_result_dataset" },
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

In [None]:
# specify all notebook wide libraries imports here
# Sinara lib imports is left in the place of their usage
import json
import os
import os.path as osp
import glob
import matplotlib.pyplot as plt
import numpy as np

from sklearn.metrics import multilabel_confusion_matrix, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report

### Load inference_result_dataset

In [None]:
tmp_inputs = substep.tmp_inputs()

# reading inference detect result dataset 
predict_test_dataset_file = osp.join(tmp_inputs.inference_result_dataset, 'predict_test_dataset.json')
with open(predict_test_dataset_file, 'r') as f:
   predict_test_dataset = json.load(f)

with open(osp.join(tmp_inputs.inference_result_dataset, 'categories.json'), 'r') as f:
   categories = json.load(f)
category_names = list(categories.values())
n_categories = len(category_names)

### Get ground true classification and prediction

In [None]:
ground_truth_labels = np.array([sample["ground_true_class_index"] for sample in predict_test_dataset])
predict_labels = np.array([sample["predict_class_index"] for sample in predict_test_dataset])
predict_scores = np.array([sample["predict_class_score"] for sample in predict_test_dataset])

# Convert array of indices to one-hot encoded array
ground_truth_labels_one_hot = np.zeros((ground_truth_labels.size, n_categories))
ground_truth_labels_one_hot[np.arange(ground_truth_labels.size), ground_truth_labels] = 1

### Evaluate the test dataset 

#### Eval Precision-Recall Curve

In [None]:
from utils.metrics import plot_precision_recall

plot_precision_recall(ground_truth_labels_one_hot, predict_scores, class_names=category_names)

#### Eval Confusion Matrix

In [None]:
val_confusion_matrix = confusion_matrix(ground_truth_labels, predict_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=val_confusion_matrix, display_labels=category_names)
disp.plot(include_values=True, cmap="viridis", ax=None, xticks_rotation="vertical")
plt.show()

#### Eval Average Precision, Recall Metrics

In [None]:
val_report = classification_report(ground_truth_labels, predict_labels, target_names=category_names, output_dict=True)
print(classification_report(ground_truth_labels, predict_labels, target_names=category_names))

### Check by metric accuracy

In [None]:
threshold_accuracy = substep_params["threshold_accuracy"]

accuracy = val_report["accuracy"]
print(f"accuracy = {accuracy}")
assert accuracy > threshold_accuracy, f"The calculated Accuracy metric on the test dataset is less than the acceptable value <{threshold_accuracy}"