In [1]:
import torch
import pathlib
from tgt import read_textgrid

In [3]:
def evaluate(cmp_dir):
    cmp_dir = pathlib.Path(cmp_dir)
    sr = 32000
    lines = open("../data/CMU_ARCTICS_SJB30.txt", "r").readlines()

    cnt = 0
    cnt_est = 0
    cnt_id = 0
    cnt_miss = 0
    cnt_fa = 0
    cnt_fat = 0
    cnt_outside = 0
    sum_err = 0
    sum_ser = 0
    
    for line in lines:
        wav_path = pathlib.Path("../data") / line.strip()
        tgt_path = wav_path.with_suffix(".TextGrid")
        speaker = wav_path.parent.stem
        p1_path = cmp_dir / wav_path.parent.stem / f"{wav_path.stem}.wav.txt"
        
        p1 = torch.tensor([int(c) for c in p1_path.read_text().split()])
        tgt = read_textgrid(str(tgt_path))
        vuv_tier = tgt.get_tier_by_name("vuv")
        epoch_tier = tgt.get_tier_by_name("epoch")

        intervals = []

        offset = +50
        
        for voiced in vuv_tier:
            epoch = epoch_tier.get_annotations_between_timepoints(voiced.start_time, voiced.end_time)
            epoch = [int(point.time * sr) for point in epoch]

            for i in range(len(epoch)):
                if i == 0:
                    left = voiced.start_time * sr
                else:
                    left = (epoch[i-1] + epoch[i]) / 2
                    
                if i == len(epoch) - 1:
                    right = voiced.end_time * sr
                else:
                    right = (epoch[i] + epoch[i+1]) / 2
                intervals.append((left + offset, epoch[i] + offset, right + offset))
                
        cnt += len(intervals)
        for (left, center, right) in intervals:
            mask = ((p1 >= left) & (p1 < right))
            match = mask.sum()
            if match == 0:
                #print(f"missing {wav_path}[{center}]")
                cnt_miss += 1
            elif match == 1:
                cnt_id += 1
                err = p1[mask].item() - center
                sum_err += err
                sum_ser += err ** 2
            else:
                print(f"False Alarm {wav_path}[{center}]=> {left, center, right} {p1[mask]}")
                cnt_fa += 1
                cnt_fat += match - 1 
        cnt_est += int( p1.shape[0] )
    print(f"CNT: {cnt}")
    print(f"IDR: {cnt_id / cnt * 100}")
    print(f"MR: {cnt_miss / cnt * 100}")
    print(f"FAR: {cnt_fa / cnt * 100}")
    print(f"FAT: {cnt_fat / cnt * 100}")
    print(f"Bias: {sum_err/cnt_id/sr*1000}")
    print(f"IDA: {(sum_ser/cnt_id - (sum_err/cnt_id)**2)**0.5 / sr * 1000}")
        

In [4]:
# p = bpdp(wav[0].view(-1), sr, wl_0=0.05, wl_1=0.002, f_lo=50.0, f_hi=400.0, beam_size=5, filt="bp1")
evaluate("../data/CMU_ARCTICS_SJB30_BP/")

False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[7990]=> (7812.20552391531, 7990, 8121.0) tensor([7984, 8044])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[8252]=> (8121.0, 8252, 8392.0) tensor([8242, 8299])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[8532]=> (8392.0, 8532, 8673.0) tensor([8521, 8573, 8631])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[8814]=> (8673.0, 8814, 8956.0) tensor([8802, 8851, 8910])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[9098]=> (8956.0, 9098, 9242.5) tensor([9086, 9133, 9191])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[9387]=> (9242.5, 9387, 9531.0) tensor([9374, 9419, 9476])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[9675]=> (9531.0, 9675, 9820.0) tensor([9661, 9706, 9761])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[9965]=> (9820.0, 9965, 10112.5) tensor([ 9952,  9995, 10050])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[102

In [5]:
# p = bpdp(wav[0].view(-1), sr, wl_0=0.05, wl_1=0.002, f_lo=50.0, f_hi=400.0, beam_size=5, filt="bp1")
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[76403]=> (76156.43464390145, 76403, 76551.0) tensor([76159, 76400])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[91826]=> (91674.90594481053, 91826, 91959.5) tensor([91712, 91870])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[95858]=> (95712.08255749999, 95858, 96131.0) tensor([95855, 96101])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[101864]=> (101691.5, 101864, 102049.0) tensor([101868, 101978])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[21501]=> (21263.15937180434, 21501, 21636.0) tensor([21348, 21556])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[75716]=> (75436.34952360063, 75716, 75846.0) tensor([75475, 75721])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0006.wav[54181]=> (53866.78164915942, 54181, 54339.0) tensor([53953, 54172])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0006.wav[99670]=> (99154.66688783678, 99670, 99824.0) tensor([993

In [80]:
evaluate("../data/CMU_ARCTICS_SJB30_HPZFF/")

CNT: 7481
IDR: 96.81860713808315
MR: 0.5614222697500334
FAR: 2.6199705921668226
FAT: 2.914048910140991
Bias: 0.383391723042938
IDA: 0.45533251279761366


In [81]:
evaluate("../data/CMU_ARCTICS_SJB30_YAGA/")

CNT: 7481
IDR: 95.96310653655928
MR: 0.44111749766074054
FAR: 3.5957759657799757
FAT: 4.531479835510254
Bias: -0.3738943446162418
IDA: 0.536002928404601


In [372]:
#bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.89640422403422
MR: 0.03836385509958562
FAR: 0.06523192086619437
FAT: 6.436038017272949
Bias: 1.5628029003877124
IDA: 0.7018104644952373


In [157]:
#bpdp(wav[0].view(-1), sr, fl=500, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.8930624248095175
MR: 0.039967918727442854
FAR: 0.0669696564630397
FAT: 6.734794616699219
Bias: 1.533672915731178
IDA: 0.7261082713212447


In [180]:
#p = bpdp(wav[0].view(-1), sr, fl=1200, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9116428284988638
MR: 0.04210667023125251
FAR: 0.046250501269883705
FAT: 5.989172458648682
Bias: 1.6423478739002932
IDA: 0.6145327524229001


In [219]:
# p = bpdp(wav[0].view(-1), sr, fl=1300, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9095040769950541
MR: 0.04504745354899078
FAR: 0.045448469455955084
FAT: 5.955353736877441
Bias: 1.6594053130511464
IDA: 0.5996148721331224


In [225]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=400.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9033551664216014
MR: 0.04852292474268146
FAR: 0.04812190883571715
FAT: 6.045715808868408
Bias: 1.7036521530038473
IDA: 0.5636381332122895


In [267]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=550.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9026868065766609
MR: 0.04865659671166956
FAR: 0.04865659671166956
FAT: 6.119235515594482
Bias: 1.706796979120391
IDA: 0.5666547434514959


In [312]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=10.0, f_hi=550.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9024194626386847
MR: 0.04865659671166956
FAR: 0.04892394064964577
FAT: 6.064697265625
Bias: 1.7064462672196712
IDA: 0.5667887601080251


In [247]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1200, f_lo=50.0, f_hi=500.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9000133671968988
MR: 0.0541371474401818
FAR: 0.0458494853629194
FAT: 5.727977752685547
Bias: 1.767363173919501
IDA: 0.5414794608974669


In [346]:
# bp3, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=550.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.3697366662210934
MR: 0.5949739339660474
FAR: 0.035289399812859244
FAT: 1.6021921634674072
Bias: 1.3066815798987708
IDA: 9.638432877229336
