## sed_jams_notebook

This notebook shows how to compute the segment-based F1 score for sound event detection from 2 JAMS files containing sound event annotations (one refernce, one estimate)

In [1]:
# import all the things we need
import jams
import numpy as np

In [2]:
# Let's start by loading the reference and estimate JAMS files
ref_file = 'data/sed_ref.jams'
est_file = 'data/sed_est.jams'

ref_jam = jams.load(ref_file)
est_jam = jams.load(est_file)

In [3]:
# Let's examine the reference labels:
ref_jam.annotations[0].data

Unnamed: 0,time,duration,value,confidence
0,00:00:00,00:00:08.719542,vehicle: engine,1.0
1,00:00:04.087710,00:00:04.631832,church bell: ring,1.0
2,00:00:06.665484,00:00:01.108647,human: voice,1.0


In [4]:
# Let's examine the estimate labels:
est_jam.annotations[0].data

Unnamed: 0,time,duration,value,confidence
0,00:00:02.500000,00:00:05.500000,vehicle: engine,1.0
1,00:00:04,00:00:03,church bell: ring,1.0
2,00:00:06.665484,00:00:01.108647,human: voice,1.0
3,00:00:08,00:00:00.500000,alien,1.0


In [5]:
# Both files correspondin to a clip of roughly 9 seconds:
ref_dur = ref_jam.file_metadata.duration
est_dur = est_jam.file_metadata.duration
print(ref_dur)
print(est_dur)

8.719542
8.719542


In [6]:
# create a grid based on a chosen segment length:
segment_length = 1 # 1 second
grid_size = int(np.ceil(max(ref_dur, est_dur) / float(segment_length)))
print(grid_size)

9


In [7]:
# Now lets create label grids for the refernce and estimatze. Each item in the label grid will 
# be a list containing all the labels that overlap with the corresponding time segment
ref_grid = []
est_grid = []
for _ in range(grid_size):
    ref_grid.append([])
    est_grid.append([])

# This is not necessarily the most efficient way to do this, but at least it's correct...

# Slice reference events into segments
for ind, event in ref_jam.annotations[0].data.iterrows():
    
#     print(ind)
#     print(event)

    # get event start/end times
    event_start = event['time'].total_seconds()
    event_end = event['time'].total_seconds() + event['duration'].total_seconds()
    
    # find first and last segments that overlap with the event
    # note the integer division operator used here!
    first_seg_ind = int(event_start // segment_length)
    last_seg_ind = int(event_end // segment_length)
                    
    # Handle corner case where event ends exaclty on grid boundary
    if event_end % segment_length == 0:
        last_seg_ind -= 1
    
#     print(first_seg_ind)
#     print(last_seg_ind)
    
    # add label to all overlapping segments
    for s in range(first_seg_ind, last_seg_ind+1):
        ref_grid[s].append(event['value'])

# Slice estimate events into segments
for _, event in est_jam.annotations[0].data.iterrows():

    # get event start/end times
    event_start = event['time'].total_seconds()
    event_end = event['time'].total_seconds() + event['duration'].total_seconds()
    
    # find first and last segments that overlap with the event
    # note the integer division operator used here!
    first_seg_ind = int(event_start // segment_length)
    last_seg_ind = int(event_end // segment_length)
    
    # Handle corner case where event ends exaclty on grid boundary
    if event_end % segment_length == 0:
        last_seg_ind -= 1
    
    # add label to all overlapping segments
    for seg in range(first_seg_ind, last_seg_ind+1):
        est_grid[seg].append(event['value'])
        
# Ensure each label list is unique
for i in range(grid_size):
    ref_grid[i] = np.unique(ref_grid[i]).tolist()
    est_grid[i] = np.unique(est_grid[i]).tolist()

In [8]:
# Let's have a look at the reference label grid:
ref_grid

[[u'vehicle: engine'],
 [u'vehicle: engine'],
 [u'vehicle: engine'],
 [u'vehicle: engine'],
 [u'church bell: ring', u'vehicle: engine'],
 [u'church bell: ring', u'vehicle: engine'],
 [u'church bell: ring', u'human: voice', u'vehicle: engine'],
 [u'church bell: ring', u'human: voice', u'vehicle: engine'],
 [u'church bell: ring', u'vehicle: engine']]

In [9]:
# Let's have a look at the estimate label grid:
est_grid

[[],
 [],
 [u'vehicle: engine'],
 [u'vehicle: engine'],
 [u'church bell: ring', u'vehicle: engine'],
 [u'church bell: ring', u'vehicle: engine'],
 [u'church bell: ring', u'human: voice', u'vehicle: engine'],
 [u'human: voice', u'vehicle: engine'],
 [u'alien']]

In [10]:
# now we can easily compute the metrics for each segment:

# Total scores
TP_tot = 0
FP_tot = 0
FN_tot = 0

for seg_ref_labels, seg_est_labels in zip(ref_grid, est_grid):
    
    # True positives are labels that appear in both lists
    TP = len(np.intersect1d(seg_ref_labels, seg_est_labels))
    # False positives are labels that appear in est but not in ref
    FP = len(seg_est_labels) - TP
    # False negatives are labels that appear in ref but not in est
    FN = len(seg_ref_labels) - TP
    
    # Add the segment scores to the total scores
    TP_tot += TP
    FP_tot += FP
    FN_tot += FN

In [11]:
# Now we can compute the gloval F1 measure:
F1 = 2*TP_tot / float(2*TP_tot + FP_tot + FN_tot)
print(F1)

0.785714285714


## All combined into a single function
Now we're going to package all this into a single function that takes 2 JAMS filepaths and the desired segment duration and computes everything!

In [12]:
def segment_f1(ref_jams_path, est_jams_path, segment_length=1):
    
    # Let's start by loading the reference and estimate JAMS files
    ref_file = '/Users/justin/Downloads/sed_demo/sed_ref.jams'
    est_file = '/Users/justin/Downloads/sed_demo/sed_est.jams'

    ref_jam = jams.load(ref_file)
    est_jam = jams.load(est_file)
    
    # create a grid based on a chosen segment length:
    grid_size = int(np.ceil(max(ref_dur, est_dur) / float(segment_length)))
    
    # Now lets create label grids for the refernce and estimatze. Each item in the label grid will 
    # be a list containing all the labels that overlap with the corresponding time segment
    ref_grid = []
    est_grid = []
    for _ in range(grid_size):
        ref_grid.append([])
        est_grid.append([])

    # This is not necessarily the most efficient way to do this, but at least it's correct...

    # Slice reference events into segments
    for _, event in ref_jam.annotations[0].data.iterrows():

        # get event start/end times
        event_start = event['time'].total_seconds()
        event_end = event['time'].total_seconds() + event['duration'].total_seconds()

        # find first and last segments that overlap with the event
        # note the integer division operator used here!
        first_seg_ind = int(event_start // segment_length)
        last_seg_ind = int(event_end // segment_length)
        
        # Handle corner case where event ends exaclty on grid boundary
        if event_end % segment_length == 0:
            last_seg_ind -= 1

        # add label to all overlapping segments
        for s in range(first_seg_ind, last_seg_ind+1):
            ref_grid[s].append(event['value'])

    # Slice estimate events into segments
    for _, event in est_jam.annotations[0].data.iterrows():

        # get event start/end times
        event_start = event['time'].total_seconds()
        event_end = event['time'].total_seconds() + event['duration'].total_seconds()

        # find first and last segments that overlap with the event
        # note the integer division operator used here!
        first_seg_ind = int(event_start // segment_length)
        last_seg_ind = int(event_end // segment_length)
        
        # Handle corner case where event ends exaclty on grid boundary
        if event_end % segment_length == 0:
            last_seg_ind -= 1

        # add label to all overlapping segments
        for seg in range(first_seg_ind, last_seg_ind+1):
            est_grid[seg].append(event['value'])

    # Ensure each label list is unique
    for seg in range(grid_size):
        ref_grid[seg] = np.unique(ref_grid[seg]).tolist()
        est_grid[seg] = np.unique(est_grid[seg]).tolist()
        
    # now we can easily compute the metrics for each segment:
    # Total scores
    TP_tot = 0
    FP_tot = 0
    FN_tot = 0

    for seg_ref_labels, seg_est_labels in zip(ref_grid, est_grid):

        # True positives are labels that appear in both lists
        TP = len(np.intersect1d(seg_ref_labels, seg_est_labels))
        # False positives are labels that appear in est but not in ref
        FP = len(seg_est_labels) - TP
        # False negatives are labels that appear in ref but not in est
        FN = len(seg_ref_labels) - TP

        # Add the segment scores to the total scores
        TP_tot += TP
        FP_tot += FP
        FN_tot += FN
        
    # Now we can compute the gloval F1 measure:
    F1 = 2*TP_tot / float(2*TP_tot + FP_tot + FN_tot)
    
    return F1

### Fun time!
Now we can easily evaluate different references against differences estimates, as well as try different segment lengths and see how they influence the results. Remember that the segment length basically defines how lenient we are about matching event onsets/offsets - the default of 1s means they can be at most 1s from each other.

In [13]:
# Using the default 1s segment
segment_f1(ref_file, est_file, segment_length=1)

0.7857142857142857

In [14]:
# If we use a larger segment, we become more lenient and the score will go up
segment_f1(ref_file, est_file, segment_length=3)

0.9230769230769231

In [15]:
# If we use a smaller segment, we become more strict and the score will go down
segment_f1(ref_file, est_file, segment_length=0.5)

0.7692307692307693

In [16]:
# If we use a really small segment, the evaluation is basically frame-based:
segment_f1(ref_file, est_file, segment_length=0.01)

0.7754770604953309

That's all folks!