In [1]:
import pkg_resources 
from tdev2.readers.gold_reader import *
from tdev2.readers.disc_reader import *

In [2]:
from tdev2.measures.ned import *
from tdev2.measures.boundary import *
from tdev2.measures.grouping import *
from tdev2.measures.coverage import *
from tdev2.measures.token_type import *

In [3]:
def prf2dict(dct, measurename, obj):
    # save precision-recall-f of tde objects to dictionary
    dct[measurename + '_P'] = obj.precision
    dct[measurename + '_R'] = obj.recall
    dct[measurename + '_F'] = obj.fscore
    
    return dct


def compute_scores(gold, disc, measures=[]):
    scores = dict()
    
    # Launch evaluation of each metric
    if len(measures) == 0 or "boundary" in measures:
        print('Computing Boundary...')
        boundary = Boundary(gold, disc)
        boundary.compute_boundary()
        scores = prf2dict(scores, 'boundary', boundary)
        
    if len(measures) == 0 or "grouping" in measures:
        print('Computing Grouping...')
        grouping = Grouping(disc)
        grouping.compute_grouping()
        scores = prf2dict(scores, 'grouping', grouping)    
        
    if len(measures) == 0 or "token/type" in measures:
        print('Computing Token and Type...')
        token_type = TokenType(gold, disc)
        token_type.compute_token_type()
        scores['token_P'],scores['token_R'],scores['token_F'] = token_type.precision[0], token_type.recall[0], token_type.fscore[0]
        scores['type_P'],scores['type_R'],scores['type_F'] = token_type.precision[1], token_type.recall[1], token_type.fscore[1]        
        
    if len(measures) == 0 or "coverage" in measures:
        print('Computing Coverage...')
        coverage = Coverage(gold, disc)
        coverage.compute_coverage()
        scores['coverage'] = coverage.coverage
        
    if len(measures) == 0 or "ned" in measures:
        print('Computing NED...')
        ned = Ned(disc)
        ned.compute_ned()
        scores['ned'] = ned.ned
    
    return scores

In [4]:
import sys
sys.path.append('/home/korhan/Dropbox/tez_scripts/')
from utils.tde_utils import zrexp2tde

wrd_path = 'tdev2/share/phoenix.wrd'
phn_path = 'tdev2/share/phoenix.phn'

exp_path = '/home/korhan/Desktop/zerospeech2017/track2/src/ZRTools/exp/Signer03_c3_right_PCA50_10_3_02_7_05_06_13_06_04.lsh64/'


disc_clsfile = zrexp2tde(exp_path)

print(disc_clsfile)

/home/korhan/Desktop/zerospeech2017/track2/src/ZRTools/exp/Signer03_c3_right_PCA50_10_3_02_7_05_06_13_06_04.lsh64/results/master_graph.class


In [6]:
scores

{'boundary_P': 0.5781818181818181,
 'boundary_R': 0.0023952276219457082,
 'boundary_F': 0.0047706917503037944,
 'grouping_P': 0.35897435897435903,
 'grouping_R': 0.7466666666666665,
 'grouping_F': 0.4848484848484848,
 'token_P': 0.13043478260869565,
 'token_R': 0.0003953155111923704,
 'token_F': 0.0007882420559980294,
 'type_P': 0.1782178217821782,
 'type_R': 0.01462225832656377,
 'type_F': 0.027027027027027025,
 'coverage': 0.004449515116942385,
 'ned': 0.5314495254529766,
 'n_clus': 60,
 'n_node': 184}

In [5]:
gold = Gold(wrd_path=wrd_path, phn_path=phn_path) 
disc = Disc(disc_clsfile, gold) 

scores = compute_scores(gold, disc)
scores['n_clus'] = len(disc.clusters)
scores['n_node'] = len(disc.intervals)
scores

Discovered Class file read

184 unique intervals found
Computing Boundary...
Computing Grouping...
Computing Token and Type...
Computing Coverage...
Computing NED...


{'boundary_P': 0.5781818181818181,
 'boundary_R': 0.0023952276219457082,
 'boundary_F': 0.0047706917503037944,
 'grouping_P': 0.35897435897435903,
 'grouping_R': 0.7466666666666665,
 'grouping_F': 0.4848484848484848,
 'token_P': 0.13043478260869565,
 'token_R': 0.0003953155111923704,
 'token_F': 0.0007882420559980294,
 'type_P': 0.1782178217821782,
 'type_R': 0.01462225832656377,
 'type_F': 0.027027027027027025,
 'coverage': 0.004449515116942385,
 'ned': 0.5314495254529766,
 'n_clus': 60,
 'n_node': 184}