In [4]:
import torch
import pathlib
from tgt import read_textgrid

In [5]:
def evaluate(cmp_dir):
    cmp_dir = pathlib.Path(cmp_dir)
    sr = 32000
    lines = open("../data/CMU_ARCTICS_SJB30.txt", "r").readlines()

    cnt = 0
    cnt_est = 0
    cnt_id = 0
    cnt_miss = 0
    cnt_fa = 0
    cnt_fat = 0
    cnt_outside = 0
    sum_err = 0
    sum_ser = 0
    
    for line in lines:
        wav_path = pathlib.Path("../data") / line.strip()
        tgt_path = wav_path.with_suffix(".TextGrid")
        p1_path = cmp_dir / wav_path.parent.stem / f"{wav_path.stem}.wav.txt"
        
        p1 = torch.tensor([int(c) for c in p1_path.read_text().split()])
        tgt = read_textgrid(str(tgt_path))
        vuv_tier = tgt.get_tier_by_name("vuv")
        epoch_tier = tgt.get_tier_by_name("epoch")

        intervals = []
        
        for voiced in vuv_tier:
            epoch = epoch_tier.get_annotations_between_timepoints(voiced.start_time, voiced.end_time)
            epoch = [int(point.time * sr) for point in epoch]

            for i in range(len(epoch)):
                if i == 0:
                    left = voiced.start_time * sr
                else:
                    left = (epoch[i-1] + epoch[i]) / 2
                    
                if i == len(epoch) - 1:
                    right = voiced.end_time * sr
                else:
                    right = (epoch[i] + epoch[i+1]) / 2
                intervals.append((left, epoch[i], right))
                
        cnt += len(intervals)
        for (left, center, right) in intervals:
            mask = ((p1 >= left) & (p1 < right))
            match = mask.sum()
            if match == 0:
                print(f"missing {wav_path}[{center}]")
                cnt_miss += 1
            elif match == 1:
                cnt_id += 1
                err = p1[mask].item() - center
                sum_err += err
                sum_ser += err ** 2
            else:
                print(f"False Alarm {wav_path}[{center}]=> {left, center, right} {p1[mask]}")
                cnt_fa += 1
                cnt_fat += match - 1 
        cnt_est += int( p1.shape[0] )
    print(f"CNT: {cnt}")
    print(f"IDR: {cnt_id / cnt * 100}")
    print(f"MR: {cnt_miss / cnt * 100}")
    print(f"FAR: {cnt_fa / cnt * 100}")
    print(f"FAT: {cnt_fat / cnt * 100}")
    print(f"Bias: {sum_err/cnt_id/sr*1000}")
    print(f"IDA: {(sum_ser/cnt_id - (sum_err/cnt_id)**2)**0.5 / sr * 1000}")
        

In [8]:
# p = bpdp(wav[0].view(-1), sr, wl_0=0.05, wl_1=0.002, f_lo=50.0, f_hi=400.0, beam_size=5, filt="bp1")
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[7940]=> (7762.20552391531, 7940, 8071.0) tensor([7774, 7984])
missing ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[53997]
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[59246]=> (59083.63123247693, 59246, 59353.0) tensor([59127, 59292])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[76353]=> (76106.43464390145, 76353, 76501.0) tensor([76159, 76400])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[84080]=> (83850.08401451686, 84080, 84228.5) tensor([83889, 84120])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[91776]=> (91624.90594481053, 91776, 91909.5) tensor([91712, 91870])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[96354]=> (96081.0, 96354, 96504.0) tensor([96101, 96397])
missing ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[100388]
missing ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[101469]
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[101814]

In [7]:
evaluate("../data/CMU_ARCTICS_SJB30_HPZFF/")

False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[76353]=> (76106.43464390145, 76353, 76501.0) tensor([76115, 76397])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[84080]=> (83850.08401451686, 84080, 84228.5) tensor([83890, 84123])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[96354]=> (96081.0, 96354, 96504.0) tensor([96105, 96405])
missing ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[101148]
missing ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[30845]
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[31066]=> (30955.5, 31066, 31181.0) tensor([30958, 31166])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[49079]=> (48840.893501884646, 49079, 49218.5) tensor([48883, 49126])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[66148]=> (65976.20419583799, 66148, 66291.0) tensor([65994, 66244])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0007.wav[75666]=> (75386.34952360063, 75666, 75796.0) tensor([75469, 75712])
missi

In [9]:
evaluate("../data/CMU_ARCTICS_SJB30_YAGA/")

False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0008.wav[56606]=> (56521.0, 56606, 56817.23726963381) tensor([56644, 56733])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[59246]=> (59083.63123247693, 59246, 59353.0) tensor([59101, 59287])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[67736]=> (67554.42541011804, 67736, 67850.5) tensor([67596, 67782])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[76353]=> (76106.43464390145, 76353, 76501.0) tensor([76153, 76391])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[79897]=> (79717.5, 79897, 80091.73247057904) tensor([79827, 79910, 79976, 80050])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[84080]=> (83850.08401451686, 84080, 84228.5) tensor([83882, 84117])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[91776]=> (91624.90594481053, 91776, 91909.5) tensor([91632, 91865])
False Alarm ../data/CMU_ARCTICS_SJB30/jmk/arctic_a0009.wav[96354]=> (96081.0, 96354, 96504.0) ten

In [372]:
#bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.89640422403422
MR: 0.03836385509958562
FAR: 0.06523192086619437
FAT: 6.436038017272949
Bias: 1.5628029003877124
IDA: 0.7018104644952373


In [157]:
#bpdp(wav[0].view(-1), sr, fl=500, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.8930624248095175
MR: 0.039967918727442854
FAR: 0.0669696564630397
FAT: 6.734794616699219
Bias: 1.533672915731178
IDA: 0.7261082713212447


In [180]:
#p = bpdp(wav[0].view(-1), sr, fl=1200, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9116428284988638
MR: 0.04210667023125251
FAR: 0.046250501269883705
FAT: 5.989172458648682
Bias: 1.6423478739002932
IDA: 0.6145327524229001


In [219]:
# p = bpdp(wav[0].view(-1), sr, fl=1300, f_lo=50.0, f_hi=300.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9095040769950541
MR: 0.04504745354899078
FAR: 0.045448469455955084
FAT: 5.955353736877441
Bias: 1.6594053130511464
IDA: 0.5996148721331224


In [225]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=400.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9033551664216014
MR: 0.04852292474268146
FAR: 0.04812190883571715
FAT: 6.045715808868408
Bias: 1.7036521530038473
IDA: 0.5636381332122895


In [267]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=550.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9026868065766609
MR: 0.04865659671166956
FAR: 0.04865659671166956
FAT: 6.119235515594482
Bias: 1.706796979120391
IDA: 0.5666547434514959


In [312]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=10.0, f_hi=550.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9024194626386847
MR: 0.04865659671166956
FAR: 0.04892394064964577
FAT: 6.064697265625
Bias: 1.7064462672196712
IDA: 0.5667887601080251


In [247]:
# bp2, p = bpdp(wav[0].view(-1), sr, fl=1200, f_lo=50.0, f_hi=500.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.9000133671968988
MR: 0.0541371474401818
FAR: 0.0458494853629194
FAT: 5.727977752685547
Bias: 1.767363173919501
IDA: 0.5414794608974669


In [346]:
# bp3, p = bpdp(wav[0].view(-1), sr, fl=1000, f_lo=50.0, f_hi=550.0, beam_size=5)
evaluate("../data/CMU_ARCTICS_SJB30_BPDP5/")

IDR: 0.3697366662210934
MR: 0.5949739339660474
FAR: 0.035289399812859244
FAT: 1.6021921634674072
Bias: 1.3066815798987708
IDA: 9.638432877229336
