# Aprendizaje Multietiqueta de Patrones Geométricos en Objetos de Herencia Cultural
# Calculadora de métricas
## Seminario de Tesis II, Primavera 2022
### Master of Data Science. Universidad de Chile.
#### Prof. guía: Benjamín Bustos - Prof. coguía: Iván Sipirán
#### Autor: Matías Vergara

Este notebook es totalmente auxiliar. Su objetivo es, dado un archivo de predicciones (matriz de ceros y unos), calcular métricas utilizando la clase KunischMetrics (utils.py) y guardar los resultados en un archivo de metadata.

Algunos puntos a notar:
- Actualmente, solo los métodos de C2AE, CNN-RNN y TDNN generan archivos de predicciones compatibles (extraer las predicciones desde scikit fue más complejo).
- Los notebooks correspondientes a dichos métodos crean el archivo metadata.csv por sí solos, por lo cual no es necesario ejecutar este notebook.
- Sin embargo, este notebook sí resulta muy útil cuando se agrega una nueva métrica a KunischMetrics y se quiere aplicar dicha métrica a todos los resultados previos sin volver a calcularlos.

## Imports

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

from PIL import Image
import math
from sklearn.metrics import accuracy_score,hamming_loss, accuracy_score, f1_score, precision_score, recall_score

%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.metrics import multilabel_confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report

from textwrap import wrap


from utils import KunischMetrics
from utils import KunischPruner
from utils import DataExplorer
from utils import KunischPlotter

## Configuración de archivos a leer

In [3]:
# Para qué experimentos se desea leer los archivos
output_dir = os.path.join('..', 'outputs', 'cnn-rnn')
K = 4
experimentos = ['base',  'crop1',  'blur_rain_ref_rot_crop1_randaug1_elastic1']
apellidos = ['_weighted_1']

# Para qué cantidades de etiquetas se desea leer los archivos
nlabels = [5, 14, 26, 34, 54, 63, 72, 82, 91, 107, 131, 169, 281]

# Ir a buscar top_labels para kunischMetrics
labels_dir = os.path.join('..', 'labels')
dic_toplabels = {}
for labels in nlabels: 
    with open(os.path.join(labels_dir, f'top_{labels}L.pickle'), 'rb') as f:
        dic_toplabels[labels] = pickle.load(f)
#dic_toplabels

## Calculo y guardado de scores

In [10]:
scores = {}

# Iterar sobre archivos de predicciones creando archivo de resultados
for n in nlabels:
    pruner = KunischPruner(n)
    pruner.set_top_labels(dic_toplabels[n])
    for exp in experimentos:
        for apellido in apellidos:
            path_metadata = os.path.join(output_dir, exp, f'{n}L{apellido}', 'metadata.csv')
            print("Path metadata:{}".format(path_metadata))
            df = None
            if os.path.isfile(path_metadata):
                df = pd.read_csv(path_metadata, index_col = 0)
            sum_f1 = 0
            sum_f2 = 0
            sum_recall = 0
            sum_precision = 0
            sum_acc = 0
            sum_hl = 0
            sum_emr = 0
            sum_hs = 0
            sum_mr1 = 0
            sum_mr2 = 0
            sum_mr3 = 0
            sum_mr4 = 0
            sum_mr5 = 0
            for fold in range(0, K):
                ytrue = pd.read_json(os.path.join(labels_dir, exp, str(fold), 'test_df.json'), orient='index')
                ytrue = pruner.filter_df(ytrue).values
                print('--' + os.path.join(output_dir, exp, f'{n}L{apellido}', str(fold)))
                ypred = pd.read_csv(os.path.join(output_dir, exp, f'{n}L{apellido}', str(fold), 'predictions.csv'),
                                   index_col = 0)
                ypred = ypred.values
                metrics = KunischMetrics(ytrue, ypred)
                sum_f1 += metrics.f1()
                sum_f2 += metrics.f2()
                sum_recall += metrics.recall()
                sum_precision += metrics.precision()
                sum_acc += metrics.acc()
                sum_hl += metrics.hl()
                sum_emr += metrics.emr()
                sum_hs += metrics.hs()
                sum_mr1 += metrics.mr1()
                sum_mr2 += metrics.mr2()
                sum_mr3 += metrics.mr3()
                sum_mr4 += metrics.mr4()
                sum_mr5 += metrics.mr5()
            
            avg_f1 = round(sum_f1/K, 4)
            avg_f2 = round(sum_f2/K, 4)
            avg_recall = round(sum_recall/K, 4)
            avg_precision = round(sum_precision/K, 4)
            avg_acc = round(sum_acc/K, 4)
            avg_hl = round(sum_hl/K, 4)
            avg_emr = round(sum_emr/K, 4)
            avg_hs = round(sum_hs/K, 4)
            avg_mr1 = round(sum_mr1/K, 4)
            avg_mr2 = round(sum_mr2/K, 4)
            avg_mr3 = round(sum_mr3/K, 4)
            avg_mr4 = round(sum_mr4/K, 4)
            avg_mr5 = round(sum_mr5/K, 4)
            
            if df is not None:
                df.loc['f1'] = [avg_f1]
                df.loc['f2'] = [avg_f2]
                df.loc['recall'] = [avg_recall]
                df.loc['precision'] = [avg_precision]
                df.loc['acc'] = [avg_acc]
                df.loc['hl'] = [avg_hl]
                df.loc['emr'] = [avg_emr]
                df.loc['hs'] = [avg_hs]
                df.loc['mr1'] = [avg_mr1]
                df.loc['mr2'] = [avg_mr2]
                df.loc['mr3'] = [avg_mr3]
                df.loc['mr4'] = [avg_mr4]
                df.loc['mr5'] = [avg_mr5]     
                df.to_csv(path_metadata)
            else:
                metadata = {
                'data_flags': exp,
                'num_labels': n,
                'f1': avg_f1,
                'f2': avg_f2,
                'recall': avg_recall,
                'precision': avg_precision,
                'acc': avg_acc,
                'hl': avg_hl,
                'emr': avg_emr,
                'hs': avg_hs,
                'mr1': avg_mr1,
                'mr2': avg_mr2,
                'mr3': avg_mr3,
                'mr4': avg_mr4,
                'mr5': avg_mr5
                }
                df = pd.DataFrame.from_dict(metadata, orient='index')
                df.to_csv(path_metadata)



..\outputs\cnn-rnn\base\5L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\5L_weighted_1\0
..\outputs\cnn-rnn\base\5L_weighted_1\1
..\outputs\cnn-rnn\base\5L_weighted_1\2
..\outputs\cnn-rnn\base\5L_weighted_1\3
..\outputs\cnn-rnn\crop1\5L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\5L_weighted_1\0
..\outputs\cnn-rnn\crop1\5L_weighted_1\1
..\outputs\cnn-rnn\crop1\5L_weighted_1\2
..\outputs\cnn-rnn\crop1\5L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\5L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\5L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\5L_weighted_1\1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\5L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\5L_weighted_1\3
..\outputs\cnn-rnn\base\14L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\14L_weighted_1\0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\base\14L_weighted_1\1
..\outputs\cnn-rnn\base\14L_weighted_1\2
..\outputs\cnn-rnn\base\14L_weighted_1\3
..\outputs\cnn-rnn\crop1\14L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\14L_weighted_1\0
..\outputs\cnn-rnn\crop1\14L_weighted_1\1
..\outputs\cnn-rnn\crop1\14L_weighted_1\2
..\outputs\cnn-rnn\crop1\14L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\14L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\14L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\14L_weighted_1\1
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\14L_weighted_1\2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\14L_weighted_1\3
..\outputs\cnn-rnn\base\26L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\26L_weighted_1\0
..\outputs\cnn-rnn\base\26L_weighted_1\1
..\outputs\cnn-rnn\base\26L_weighted_1\2
..\outputs\cnn-rnn\base\26L_weighted_1\3
..\outputs\cnn-rnn\crop1\26L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\26L_weighted_1\0
..\outputs\cnn-rnn\crop1\26L_weighted_1\1
..\outputs\cnn-rnn\crop1\26L_weighted_1\2
..\outputs\cnn-rnn\crop1\26L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\26L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\26L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\26L_weighted_1\1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\26L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\26L_weighted_1\3
..\outputs\cnn-rnn\base\34L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\34L_weighted_1\0


  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\base\34L_weighted_1\1
..\outputs\cnn-rnn\base\34L_weighted_1\2
..\outputs\cnn-rnn\base\34L_weighted_1\3
..\outputs\cnn-rnn\crop1\34L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\34L_weighted_1\0
..\outputs\cnn-rnn\crop1\34L_weighted_1\1
..\outputs\cnn-rnn\crop1\34L_weighted_1\2
..\outputs\cnn-rnn\crop1\34L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\34L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\34L_weighted_1\0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\34L_weighted_1\1
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\34L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\34L_weighted_1\3


  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\base\54L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\54L_weighted_1\0
..\outputs\cnn-rnn\base\54L_weighted_1\1
..\outputs\cnn-rnn\base\54L_weighted_1\2
..\outputs\cnn-rnn\base\54L_weighted_1\3
..\outputs\cnn-rnn\crop1\54L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\54L_weighted_1\0
..\outputs\cnn-rnn\crop1\54L_weighted_1\1
..\outputs\cnn-rnn\crop1\54L_weighted_1\2
..\outputs\cnn-rnn\crop1\54L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\54L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\54L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\54L_weighted_1\1
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\54L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\54L_weighted_1\3
..\outputs\cnn-rnn\base\63L_weighted_1\metadata.csv


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\base\63L_weighted_1\0
..\outputs\cnn-rnn\base\63L_weighted_1\1
..\outputs\cnn-rnn\base\63L_weighted_1\2
..\outputs\cnn-rnn\base\63L_weighted_1\3
..\outputs\cnn-rnn\crop1\63L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\63L_weighted_1\0
..\outputs\cnn-rnn\crop1\63L_weighted_1\1
..\outputs\cnn-rnn\crop1\63L_weighted_1\2
..\outputs\cnn-rnn\crop1\63L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\63L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\63L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\63L_weighted_1\1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\63L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\63L_weighted_1\3
..\outputs\cnn-rnn\base\72L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\72L_weighted_1\0


  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\base\72L_weighted_1\1
..\outputs\cnn-rnn\base\72L_weighted_1\2
..\outputs\cnn-rnn\base\72L_weighted_1\3
..\outputs\cnn-rnn\crop1\72L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\72L_weighted_1\0
..\outputs\cnn-rnn\crop1\72L_weighted_1\1
..\outputs\cnn-rnn\crop1\72L_weighted_1\2
..\outputs\cnn-rnn\crop1\72L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\72L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\72L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\72L_weighted_1\1
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\72L_weighted_1\2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\72L_weighted_1\3
..\outputs\cnn-rnn\base\82L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\82L_weighted_1\0
..\outputs\cnn-rnn\base\82L_weighted_1\1
..\outputs\cnn-rnn\base\82L_weighted_1\2
..\outputs\cnn-rnn\base\82L_weighted_1\3
..\outputs\cnn-rnn\crop1\82L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\82L_weighted_1\0
..\outputs\cnn-rnn\crop1\82L_weighted_1\1
..\outputs\cnn-rnn\crop1\82L_weighted_1\2
..\outputs\cnn-rnn\crop1\82L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\82L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\82L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\82L_weighted_1\1


  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\82L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\82L_weighted_1\3
..\outputs\cnn-rnn\base\91L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\91L_weighted_1\0
..\outputs\cnn-rnn\base\91L_weighted_1\1
..\outputs\cnn-rnn\base\91L_weighted_1\2
..\outputs\cnn-rnn\base\91L_weighted_1\3
..\outputs\cnn-rnn\crop1\91L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\91L_weighted_1\0
..\outputs\cnn-rnn\crop1\91L_weighted_1\1
..\outputs\cnn-rnn\crop1\91L_weighted_1\2
..\outputs\cnn-rnn\crop1\91L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\91L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\91L_weighted_1\0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\91L_weighted_1\1
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\91L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\91L_weighted_1\3
..\outputs\cnn-rnn\base\107L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\107L_weighted_1\0


  _warn_prf(average, modifier, msg_start, len(result))


..\outputs\cnn-rnn\base\107L_weighted_1\1
..\outputs\cnn-rnn\base\107L_weighted_1\2
..\outputs\cnn-rnn\base\107L_weighted_1\3
..\outputs\cnn-rnn\crop1\107L_weighted_1\metadata.csv
..\outputs\cnn-rnn\crop1\107L_weighted_1\0
..\outputs\cnn-rnn\crop1\107L_weighted_1\1
..\outputs\cnn-rnn\crop1\107L_weighted_1\2
..\outputs\cnn-rnn\crop1\107L_weighted_1\3
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\107L_weighted_1\metadata.csv
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\107L_weighted_1\0
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\107L_weighted_1\1
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\107L_weighted_1\2
..\outputs\cnn-rnn\blur_rain_ref_rot_crop1_randaug1_elastic1\107L_weighted_1\3
..\outputs\cnn-rnn\base\131L_weighted_1\metadata.csv
..\outputs\cnn-rnn\base\131L_weighted_1\0
..\outputs\cnn-rnn\base\131L_weighted_1\1
..\outputs\cnn-rnn\base\131L_weighted_1\2
..\outputs\cnn-rnn\base\131L_weighted_1\3
..\outputs\cnn-rnn\cr