# Example of model analysis for multi-class multi-label classification

#### package import and variables definition

In [None]:
import sys
sys.path.insert(0, '../../')

from odin.classes import TaskType, Metrics, Curves, CustomMetric, DatasetClassification, AnalyzerClassification

# define the path of the GT .json file
dataset_gt_param = "../../test-data/classification-ml/gt_art.json"

# define the path of the folder that contains the predictions .txt files for each model
path_to_detections = "../../test-data/classification-ml/predictions"
# path_to_detections = [("Model_A", "../../test-data/classification-ml/predictions")
#                       ("Model_B", "../../test-data/classification-ml/predictions")]

# define the classification task (CLASSIFICATION_BINARY, CLASSIFICATION_SINGLE_LABEL, CLASSIFICATION_MULTI_LABEL)
classification_type = TaskType.CLASSIFICATION_MULTI_LABEL

# [OPTIONAL] define groups of categories which are similar to each other (useful for the error analysis)
similar_classes=[[1, 4, 7], [2, 6, 10], [5, 8], [3, 6, 9]]

# [OPTIONAL] define the file_name for the meta-annotations
properties_file = "properties_art.json"

## Dataset

In [None]:
my_dataset = DatasetClassification(dataset_gt_param, 
                                   classification_type, 
                                   proposals_paths=path_to_detections,
                                   similar_classes=similar_classes,
                                   properties_file=properties_file, 
                                   save_graphs_as_png=False)

#### Reload the properties

In [None]:
# my_dataset.reload_properties(from_file=False)

# If you modify directly the '[properties].json' file
# my_dataset.reload_properties(from_file=True)

#### Co-occurrence matrix

In [None]:
my_dataset.show_co_occurrence_matrix()

#### Categories distribution

In [None]:
my_dataset.show_distribution_of_categories()

#### Properties distribution

In [None]:
my_dataset.show_distribution_of_properties()


# [OPTIONAL] define specific properties to be analyzed
# properties_to_be_analyzed = ['characters', 'color']

# my_dataset.show_distribution_of_properties(properties=properties_to_be_analyzed)

In [None]:
my_dataset.show_distribution_of_property('color')


# [OPTIONAL] define property values to be included in the analysis
# values = ["wikiart", "gallerix"]

# my_dataset.show_distribution_of_property('source', property_values=values)

#### Per-category property distribution

In [None]:
my_dataset.show_distribution_of_property_for_categories('color')

# [OPTIONAL] define the categories to be analyzed
# categories = ["Paul", "Peter"]

# [OPTIONAL] define property values to be included in the analysis
# values = ["wikiart", "gallerix"]

# my_dataset.show_distribution_of_property_for_categories('source', property_values=values, categories=categories)

In [None]:
my_dataset.show_distribution_of_property_for_category('color', 'Paul')

# [OPTIONAL] define property values to be included in the analysis
# values = ["wikiart", "gallerix"]

# my_dataset.show_distribution_of_property_for_category('source', 'Paul', property_values=values)

## Analyzer

In [None]:
my_analyzer = AnalyzerClassification('Model_A',  # N.B. the name must be the same of ones in the list of the proposals
                                     my_dataset,
                                     metric=Metrics.F1_SCORE,
                                     save_graphs_as_png=False)

#### Load custom display names

In [None]:
# if you change the display names of the categories in the properties file
my_dataset.load_categories_display_names()

# if you change the display names of the properties in the properties file
my_dataset.load_properties_display_names()

### Performance analysis based on meta-annotations

The evaluation metric is computed by considering only ground truth subsets with a specific meta-annotation value.

In [None]:
my_analyzer.analyze_properties()

# [OPTIONAL] define only specific properties to be analyzed
# meta_annotations = ['color', 'source']

# [OPTIONAL] define only specific categories to be analyzed
# categories = ['Paul', 'Peter']

# [OPTIONAL] set the evaluation metric to be used in the analysis
# eval_metric = Metrics.PRECISION_SCORE

# [OPTIONAL] set the split method (by categories or by meta-annotations)
# split_by = "categories"

# my_analyzer.analyze_properties(properties=meta_annotations, categories=categories, metric=eval_metric, split_by=split_by)

In [None]:
my_analyzer.analyze_property("color")

# [OPTIONAL] define only specific values to be analyzed
# meta_annotation_values = ["rgb"]

# [OPTIONAL] define only specific categories to be analyzed
# categories = ['Paul', 'Peter']

# [OPTIONAL] set the evaluation metric to be used in the analysis
# eval_metric = Metrics.PRECISION_SCORE

# [OPTIONAL] set the split method (by categories or by meta-annotations)
# split_by = "categories"

# my_analyzer.analyze_property("color", possible_values= meta_annotation_values, categories=categories, metric=eval_metric,  split_by=split_by)

In [None]:
my_analyzer.analyze_sensitivity_impact_of_properties()

# [OPTIONAL] define specific meta-annotations to be included in the analysis
# meta_annotations = ['characters', 'color']

# [OPTIONAL] set the evaluation metric to be used in the analysis
# eval_metric = Metrics.PRECISION_SCORE

# my_analyzer.analyze_sensitivity_impact_of_properties(properties=meta_annotations, metric=eval_metric)

### False positives errors analysis
For each category, FP errors are categorized into "similar", "background", and "other", and the impact of each type of error on the evaluation metric score is calculated.

In [None]:
my_analyzer.analyze_false_positive_errors()

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# [OPTIONAL] set the evaluation metric to be used in the analysis
# eval_metric = Metrics.PRECISION_SCORE

# my_analyzer.analyze_false_positive_errors(categories=categories, metric=eval_metric)

In [None]:
my_analyzer.analyze_false_positive_errors_for_category('Paul', metric=Metrics.ACCURACY)

# [OPTIONAL] set the evaluation metric to be used in the analysis
# eval_metric = Metrics.PRECISION_SCORE

# my_analyzer.analyze_false_positive_errors_for_category('Paul', metric=eval_metric)

In [None]:
my_analyzer.analyze_false_positive_trend()

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Peter', 'Jerome']

# [OPTIONAL] define whether the correct detections should be included
# include_correct = False

# my_analyzer.analyze_false_positive_trend(categories=categories, include_correct_predictions=include_correct)

In [None]:
my_analyzer.analyze_false_positive_trend_for_category("Jerome")

# [OPTIONAL] define whether the correct detections should be included
# include_correct = False

# my_analyzer.analyze_false_positive_trend_for_category("Paul", include_correct_predictions=include_correct)

### False negatives errors analysis
For each category, FN errors are categorized into "similar" and "other".

In [None]:
my_analyzer.analyze_false_negative_errors()

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.analyze_false_negative_errors(categories=categories)

In [None]:
my_analyzer.analyze_false_negative_errors_for_category("Paul")

### Performance analysis based on PR curve, ROC curve and F1 curve

In [None]:
my_analyzer.analyze_curve()

# [OPTIONAL] set the curve to be used in the analysis (default is PRECISION_RECALL_CURVE)
# eval_curve = Curves.ROC_CURVE 

# [OPTIONAL] set the averaging method to be used in the analysis (default is "macro")
# avg_method = "micro"

# my_analyzer.analyze_curve(curve=eval_curve, average=avg_method)

In [None]:
my_analyzer.analyze_curve_for_categories()

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# [OPTIONAL] set the curve to be used in the analysis (default is PRECISION_RECALL_CURVE)
# eval_curve = Curves.ROC_CURVE 

# my_analyzer.analyze_curve_for_categories(categories=categories, curve=eval_curve)

### Reliability analysis

In [None]:
my_analyzer.analyze_reliability()

# [OPTIONAL] define the number of bins for the analysis (default is 10)
# bins = 20
# my_analyzer.analyze_reliability(num_bins=bins)

In [None]:
my_analyzer.analyze_reliability_for_categories()

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# [OPTIONAL] define the number of bins for the analysis (default is 10)
# bins = 20

# my_analyzer.analyze_reliability_for_categories(categories=categories, num_bins=bins)

### TP, FP, FN, TN analysis

In [None]:
my_analyzer.show_confusion_matrix()


# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_confusion_matrix(categories=categories)

# [OPTIONAL] filter the output by meta-annotations values
# meta_annotations = ['color', 'characters']
# meta_annotations_values = [['rgb'], ['0-1', '5+']]

# my_analyzer.show_confusion_matrix(properties_names=meta_annotations, properties_values=meta_annotations_values)

In [None]:
my_analyzer.show_true_positive_distribution()


# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_true_positive_distribution(categories=categories)

In [None]:
my_analyzer.show_true_negative_distribution()


# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_true_negative_distribution(categories=categories)

In [None]:
my_analyzer.show_false_negative_distribution()


# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_false_negative_distribution(categories=categories)

In [None]:
my_analyzer.show_false_positive_distribution()


# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_false_positive_distribution(categories=categories)

##### Per-property distributions

In [None]:
my_analyzer.show_true_positive_distribution_for_categories_for_property("color")

# [OPTIONAL] define only specific values to be analyzed
# meta_annotation_values = ["rgb"]

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_true_positive_distribution_for_categories_for_property("color", property_values=meta_annotation_values, categories=categories)

In [None]:
my_analyzer.show_true_negative_distribution_for_categories_for_property("color")

# [OPTIONAL] define only specific values to be analyzed
# meta_annotation_values = ["rgb"]

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_true_negative_distribution_for_categories_for_property("color", property_values=meta_annotation_values, categories=categories)

In [None]:
my_analyzer.show_false_negative_distribution_for_categories_for_property("color")

# [OPTIONAL] define only specific values to be analyzed
# meta_annotation_values = ["rgb"]

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_false_negative_distribution_for_categories_for_property("color", property_values=meta_annotation_values, categories=categories)

In [None]:
my_analyzer.show_false_positive_distribution_for_categories_for_property("color")

# [OPTIONAL] define only specific values to be analyzed
# meta_annotation_values = ["rgb"]

# [OPTIONAL] define specific categories to be analyzed
# categories = ['Paul', 'Peter']

# my_analyzer.show_false_positive_distribution_for_categories_for_property("color", property_values=meta_annotation_values, categories=categories)

### Report of all the performance at all levels of granularity (overall, per-category, per-meta-annotation)

In [None]:
my_analyzer.base_report()


# [OPTIONAL] define specific evaluation metrics to be included in the report
# eval_metrics = [Metrics.ACCURACY, Metrics.F1_SCORE]

# [OPTIONAL] define specific categories to be included in the report
# categories = ['Paul', 'Peter']

# [OPTIONAL] define specific meta-annotations to be included in the report
# meta_annotations = ['characters', 'color']

# [OPTIONAL] do not include categories and/or meta-annotations in the report
# show_categories = False
# show_meta_annotations = False

# my_analyzer.base_report(metrics=eval_metrics, categories=categories, properties=meta_annotations, show_categories=show_categories, show_properties=show_meta_annotations)

### Use your custom evaluation metric

#### Define custom metrics

In [None]:
from sklearn.metrics import f1_score, average_precision_score

import numpy as np

class SklearnF1(CustomMetric):
    def evaluate_metric(self, gt, predictions, matching, is_micro_required=False):
        predictions = np.array(predictions) >= 0.5
        # when called from analysis
        if not is_micro_required: #is_micro_required == false
            return f1_score(gt, predictions, zero_division=0), None
        #when called from base report for micro avg
        else: #is_micro_required == true
            return f1_score(gt, predictions, zero_division=0, average='micro'), None
        
class SklearnAP(CustomMetric):
    def evaluate_metric(self, gt, predictions, matching, is_micro_required=False):
        # when called from analysis
        if not is_micro_required: #is_micro_required == false
            return average_precision_score(gt, predictions), None
        #when called from base report for micro avg
        else: #is_micro_required == true
            return average_precision_score(gt, predictions, average='micro'), None

#### Add custom metrics to analyzer

In [None]:
sklearn_f1 = SklearnF1("sklearn F1", is_single_threshold_metric=True)
sklearn_ap = SklearnAP("sklearn AP", is_single_threshold_metric=False)

my_analyzer.add_custom_metric(sklearn_f1)
my_analyzer.add_custom_metric(sklearn_ap)

#### Use your custom metrics for analysis

The **CustomMetric** instances just created have been added to the _Metrics_ enum. In order to evaluate the model performances with your evaluation metric, use 'Metrics.MY_CUSTOM_METRIC_NAME', where 'MY_CUSTOM_METRIC_NAME' is the name of the custom metric created in uppercase and with all the white spaces replaced with the underscore.

Example: "sklearn F1" -> Metrics.SKLEARN_F1

In [None]:
my_analyzer.analyze_property("color", metric=Metrics.SKLEARN_F1)

In [None]:
my_analyzer.analyze_sensitivity_impact_of_properties(metric=Metrics.SKLEARN_F1)

In [None]:
my_analyzer.analyze_false_positive_errors(metric=Metrics.SKLEARN_AP)

In [None]:
my_analyzer.base_report()