In [135]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os,glob
from tqdm import tqdm

In [136]:
dir_root = "/home/data/kbh/DCASE_eval/"
dir_dev  = "/home/data/kbh/DCASE_eval/dev_label/"
dir_eval = "/home/data/kbh/DCASE_eval/eval_label/"
n_track = 3
n_class = 13

cat = [
    "Female speech, woman speaking", 
    "Male speech, man speaking", 
    "Clapping",
    "Telephone",
    "Laughter",
    "Domestic sounds",
    "Walk, footsteps",
    "Door, open or close",
    "Music",
    "Musical instrument",
    "Water tap, faucet",
    "Bell",
    "Knock"
]

In [262]:
def eval_dir(dir_pred) :
    list_pred = [x for x in glob.glob(os.path.join(dir_pred,"*.npy"))]

    recall = 0.0
    f1 = 0.0
    acc = 0.0

    min_f1 = 10
    max_f1 = 0
    min_path = ""
    max_path = ""

    TP_cat = np.zeros(13)
    FP_cat = np.zeros(13)
    FN_cat = np.zeros(13)
    TN_cat = np.zeros(13)

    n_P = 0

    for path in tqdm(list_pred) : 
        name_pred = path.split('/')[-1]
        id_target = name_target.split('.')[0]
        part_pred = name_pred.split('_')
        path_target = os.path.join(dir_eval,part_pred[0]+".csv")


        t_recall, t_f1, t_acc,t_TP,t_FP,t_FN,t_TN ,t_n_P= eval_thr(path,path_target)

        n_P += t_n_P
        
        if t_f1 > max_f1 : 
            max_f1 = t_f1
            max_path = path

        if t_f1 < min_f1 : 
            min_f1 = t_f1
            min_path = path

        TP_cat += t_TP
        FP_cat += t_FP
        FN_cat += t_FN
        TN_cat += t_TN


        recall += t_recall
        f1 += t_f1
        acc += t_acc

    n_target = len(list_pred)

    print(dir_pred)
    print("n_target : {}".format(n_target))
    print("f1-score {:.5f}".format(f1/n_target))
    print("accuracy {:.5f}".format(acc/n_target))
    print("recall   {:.5f}".format(recall/n_target))
    print("n_P      {:d}".format(n_P))
    print("max f1 {:.5f} at {}".format(max_f1,max_path))
    print("min f1 {:.5f} at {}".format(min_f1,min_path))
    print("--------------------------")
    for i in range(13):
        print("f1[{}] : {:.6f}".format(cat[i],(2*TP_cat[i])/(2*TP_cat[i]+FP_cat[i]+FN_cat[i])))
    print("--------------------------")
    for i in range(13):
        print("acc[{}] : {:.6f}".format(cat[i],(TP_cat[i]+TN_cat[i])/(TP_cat[i]+TN_cat[i]+FP_cat[i]+FN_cat[i])))
    print("--------------------------")
    for i in range(13):
        print("recall[{}] : {:.6f}".format(cat[i],(TP_cat[i]/(TP_cat[i]+FN_cat[i]))))

In [293]:
def eval_thr(path_pred,path_target) : 
    pred = np.load(path_pred)
    n_frame = int(np.ceil(pred.shape[0]))
    
    # pred -> sub eval
    eval_sub = np.zeros((n_frame,n_class))

    for it in range(pred.shape[0]) :    
        # thresholding here
        for it_track in range(n_track) :
            idx = int(it/10)
            
            #print(np.argmax(pred[it,it_track,:]))
            cls_max = np.argmax(pred[it,it_track,:])
            
            # max > 0.5
        
            
            if pred[it,it_track,cls_max ] > 0.5 :
                # stronger threshold
                if cls_max == 9 or cls_max == 12 : 
                    if pred[it,it_track,cls_max ] > 0.95 :
                        eval_sub[idx,cls_max] = 1.9
                else :
                    eval_sub[idx,cls_max] = 1.0
            # weaker threshold 
            else :
                if pred[it,it_track, 0 ] > 0.05:
                    eval_sub[idx,0] = 1.0
                    continue
                    
                if pred[it,it_track, 11] > 0.1:
                    eval_sub[idx,11] = 1.0
                    continue
                    
                if pred[it,it_track, 5] > 0.3:
                    eval_sub[idx,5] = 1.0
                    continue
                    
                if pred[it,it_track, 4 ] > 0.4:
                    eval_sub[idx,4] = 1.0
                    continue
                    
                if pred[it,it_track, 8] > 0.45:
                    eval_sub[idx,8] = 1.0
                    continue
        
            """
            if it_track == 0 : 
                if pred[it, it_track,cls_max ] > 0.5 :
                     eval_sub[idx,cls_max] = 1
            if it_track == 1 : 
                if pred[it,it_track,cls_max ] > 0.5 :
                     eval_sub[idx,cls_max] = 1
            if it_track == 2 : 
                if pred[it,it_track,cls_max ] > 0.5 :
                    if cls_max == 9 : 
                        if pred[it,it_track,cls_max ] > 0.9 :
                            eval_sub[idx,cls_max] = 1
                    else :
                        eval_sub[idx,cls_max] = 1
                else :
                    if pred[it,it_track, 0 ] > 0.05:
                        eval_sub[idx,0] = 1
                    elif pred[it,it_track, 4 ] > 0.4:
                        eval_sub[idx,4] = 1
            """
    
    #print(eval_sub.shape)
                    
    # Label
    csv_label = pd.read_csv(
        path_target,
        names=["idx","1","2","3","4","5"],
        index_col="idx",
        #dtype=np.int32,
        keep_default_na=False
    )
    
    # eval csv -> eval array
    eval_label = np.zeros((n_frame,n_class))

    
    for it in range(len(csv_label.index)) :
        for it_src in range(5) : 
            if csv_label.iloc[it][it_src] != "" :
                if csv_label.iloc[it][it_src] == " ":
                    continue
                if csv_label.iloc[it][it_src] == "  ":
                    continue
                
                eval_label[it,int(csv_label.iloc[it][it_src])] = 1
        
    #print(eval_label.shape)
    # evaluate
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    
    TP_cat = np.zeros(n_class)
    FP_cat = np.zeros(n_class)
    FN_cat = np.zeros(n_class)
    TN_cat = np.zeros(n_class)
    
    
    
    for i in range(n_frame) : 
        happend = False
        #print("{} | {}".format(eval_sub[i,:],eval_label[i,:]))
        for it_cls in range(n_class):  
            # Positive
            if eval_sub[i,it_cls] != 0 :
                # True
                if eval_label[i,it_cls] != 0:
                    TP +=1
                    TP_cat[it_cls] +=1
                    happend = True
                # False
                else : 
                    FP +=1
                    FP_cat[it_cls] +=1
                    happend = True
            # Negative
            else : 
                # True
                if eval_label[i,it_cls] != 0 :
                    FN +=1
                    FN_cat[it_cls] +=1
                    happend = True
                # False
                else :
                    pass
        if not happend :
            TN_cat[:] +=1
                
    recall = TP/(TP+FN)
    f1 = (2*TP)/(2*TP+FP+FN)
    acc = (TP+TN)/(TP+TN+FP+FN)
    n_P = TP+FP
    
    return recall,f1,acc, TP_cat,FP_cat,FN_cat,TN_cat, n_P

In [296]:
eval_dir("/home/data/kbh/DCASE_eval/ER616_Real/")

100%|███████████████████████████████████████████████| 52/52 [00:06<00:00,  8.32it/s]

/home/data/kbh/DCASE_eval/ER616_Real/
n_target : 52
f1-score 0.69171
accuracy 0.55161
recall   0.73281
n_P      9661
max f1 1.00000 at /home/data/kbh/DCASE_eval/ER616_Real/mix036_pred_sed.npy
min f1 0.24615 at /home/data/kbh/DCASE_eval/ER616_Real/mix041_pred_sed.npy
--------------------------
f1[Female speech, woman speaking] : 0.698212
f1[Male speech, man speaking] : 0.779515
f1[Clapping] : 0.455285
f1[Telephone] : 0.400000
f1[Laughter] : 0.466165
f1[Domestic sounds] : 0.700669
f1[Walk, footsteps] : 0.202166
f1[Door, open or close] : 0.343750
f1[Music] : 0.689968
f1[Musical instrument] : 0.352720
f1[Water tap, faucet] : 0.000000
f1[Bell] : 0.591837
f1[Knock] : 0.144928
--------------------------
acc[Female speech, woman speaking] : 0.983685
acc[Male speech, man speaking] : 0.979115
acc[Clapping] : 0.998969
acc[Telephone] : 0.998846
acc[Laughter] : 0.996732
acc[Domestic sounds] : 0.989221
acc[Walk, footsteps] : 0.996606
acc[Door, open or close] : 0.999353
acc[Music] : 0.982741
acc[Musi


