In [1]:
# %load metric_helpers
import json
import os
import sys
import re
import glob

import pandas as pd
import numpy as np
import jiwer

from difflib import SequenceMatcher
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

from collections import Counter

%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 400
import seaborn as sn

In [2]:
def cm_analysis(y_true, y_pred, filename, labels, ymap=None, figsize=(10,10)):
    """
    Generate matrix plot of confusion matrix with pretty annotations.
    The plot image is saved to disk.
    args: 
      y_true:    true label of the data, with shape (nsamples,)
      y_pred:    prediction of the data, with shape (nsamples,)
      filename:  filename of figure file to save
      labels:    string array, name the order of class labels in the confusion matrix.
                 use `clf.classes_` if using scikit-learn models.
                 with shape (nclass,).
      ymap:      dict: any -> string, length == nclass.
                 if not None, map the labels & ys to more understandable strings.
                 Caution: original y_true, y_pred and labels must align.
      figsize:   the size of the figure plotted.
    """
    if ymap is not None:
        y_pred = [ymap[yi] for yi in y_pred]
        y_true = [ymap[yi] for yi in y_true]
        labels = [ymap[yi] for yi in labels]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_sum = np.sum(cm, axis=1, keepdims=True)
    cm_perc = cm / cm_sum.astype(float) * 100
    annot = np.empty_like(cm).astype(str)
    nrows, ncols = cm.shape
    for i in range(nrows):
        for j in range(ncols):
            c = cm[i, j]
            p = cm_perc[i, j]
            if i == j:
                s = cm_sum[i]
                #annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
                annot[i, j] = '%d' % (c)
            elif c == 0:
                annot[i, j] = ''
            else:
                #annot[i, j] = '%.1f%%\n%d' % (p, c)
                annot[i, j] = '%d' % (c)
    cm = pd.DataFrame(cm, index=labels, columns=labels)
    cm.index.name = 'Actual'
    cm.columns.name = 'Predicted'
    fig, ax = plt.subplots(figsize=figsize)
    sn.heatmap(cm, annot=annot, fmt='', ax=ax)
    #plt.savefig(filename)

In [3]:
def print_sseq(labels, hyps, tags=False):
    if not tags:
        ref_toks = labels[:]
        pred_toks = hyps[:]
    else:
        ref_toks = [x[0] for x in labels]
        pred_toks = [x[0] for x in hyps]
        ref_tags = [x[1] for x in labels]
        pred_tags = [x[1] for x in hyps]
    sseq = SequenceMatcher(None, ref_toks, pred_toks)
    for tag, i1, i2, j1, j2 in sseq.get_opcodes():
        left = range(i1, i2)
        right = range(j1, j2)
        if tag == 'equal':
            for k in range(len(left)):
                if not tags:
                    print("{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]] ))
                else:
                    print("{}\t{}\t{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]], 
                                                  ref_tags[left[k]], pred_tags[right[k]] ))
        elif tag == 'insert':
            for k in range(len(right)):
                if not tags:
                    print("{}\t{}".format('INS', pred_toks[right[k]]))
                else:
                    print("{}\t{}\t{}\t{}".format('INS', pred_toks[right[k]], 'INS', pred_tags[right[k]] ))
        elif tag == 'delete':
            for k in range(len(left)):
                if not tags:
                    print("{}\t{}".format(ref_toks[left[k]], 'DEL'))
                else:
                    print("{}\t{}\t{}\t{}".format(ref_toks[left[k]], 'DEL', ref_tags[left[k]], 'DEL'))
        else:
            # replace:
            if len(left) == len(right):
                # same number of substitutions
                for k in range(len(left)):
                    if not tags:
                        print("{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]]) )
                    else:
                        print("{}\t{}\t{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]], 
                                                      ref_tags[left[k]], pred_tags[right[k]]))
            else:
                # make some insertions and deletions
                if len(left) < len(right):
                    # treat as insertions
                    overlap = len(left)
                    for k in range(len(right)):
                        if k < overlap:
                            if not tags:
                                print("{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]]))
                            else:
                                print("{}\t{}\t{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]], 
                                                              ref_tags[left[k]], pred_tags[right[k]] ))
                        else:
                            if not tags:
                                print("{}\t{}".format('INS', pred_toks[right[k]]))
                            else:
                                print("{}\t{}\t{}\t{}".format('INS', pred_toks[right[k]], 
                                                              'INS', pred_tags[right[k]] ))
                else:
                    # treat as deletion
                    overlap = len(right)
                    for k in range(len(left)):
                        if k < overlap:
                            if not tags:
                                print("{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]]))
                            else:
                                print("{}\t{}\t{}\t{}".format(ref_toks[left[k]], pred_toks[right[k]], 
                                                              ref_tags[left[k]], pred_tags[right[k]] ))                                                              
                           
                        else:
                            if not tags:
                                print("{}\t{}".format(ref_toks[left[k]], 'DEL'))
                            else:
                                print("{}\t{}\t{}\t{}".format(ref_toks[left[k]], 'DEL', 
                                                              ref_tags[left[k]], 'DEL'))

                            

In [4]:
def align_sseq(labels, hyps):
    columns = ['toks_label', 'toks_pred', 'tags_label', 'tags_pred']
    list_row = []
    ref_toks = [x[0] for x in labels]
    pred_toks = [x[0] for x in hyps]
    ref_tags = [x[1] for x in labels]
    pred_tags = [x[1] for x in hyps]
    sseq = SequenceMatcher(None, ref_toks, pred_toks)
    for tag, i1, i2, j1, j2 in sseq.get_opcodes():
        left = range(i1, i2)
        right = range(j1, j2)
        if tag == 'equal':
            for k in range(len(left)):
                list_row.append( (ref_toks[left[k]], pred_toks[right[k]], 
                                  ref_tags[left[k]], pred_tags[right[k]]) )
        elif tag == 'insert':
            for k in range(len(right)):
                list_row.append( ('INS', pred_toks[right[k]], 'INS', pred_tags[right[k]] ))
        elif tag == 'delete':
            for k in range(len(left)):
                list_row.append((ref_toks[left[k]], 'DEL', ref_tags[left[k]], 'DEL'))
        else:
            # replace:
            if len(left) == len(right):
                # same number of substitutions
                for k in range(len(left)):
                    list_row.append((ref_toks[left[k]], pred_toks[right[k]], 
                                     ref_tags[left[k]], pred_tags[right[k]]))
            else:
                # make some insertions and deletions
                if len(left) < len(right):
                    # treat as insertions
                    overlap = len(left)
                    for k in range(len(right)):
                        if k < overlap:
                            list_row.append( (ref_toks[left[k]], pred_toks[right[k]],
                                              ref_tags[left[k]], pred_tags[right[k]] ))
                        else:
                            list_row.append(('INS', pred_toks[right[k]], 
                                             'INS', pred_tags[right[k]] ))
                else:
                    # treat as deletion
                    overlap = len(right)
                    for k in range(len(left)):
                        if k < overlap:
                            list_row.append((ref_toks[left[k]], pred_toks[right[k]], 
                                             ref_tags[left[k]], pred_tags[right[k]] ))                                                              
                           
                        else:
                            list_row.append((ref_toks[left[k]], 'DEL', 
                                             ref_tags[left[k]], 'DEL'))

    #list_row = ['toks_label', 'toks_pred', 'tags_label', 'tags_pred']
    return pd.DataFrame(list_row, columns=columns)
                            

In [5]:
def plot_confusion_matrix(cm, classes, normalize=False,
        title='Confusion matrix', cmap=plt.cm.Blues, va="center"):
    import itertools
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Confusion matrix, normalized")
    else:
        print('Confusion matrix, raw counts')
    print(cm)
    #fig = plt.figure(figsize=(12,8), dpi= 100, facecolor='w', edgecolor='k')
    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                horizontalalignment="center", verticalalignment=va,
                color="white" if cm[i, j] > thresh else "black")
    plt.xlabel('predicted')
    plt.ylabel('label')
    plt.tight_layout()

In [6]:
def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)
    # len(s1) >= len(s2)
    if len(s2) == 0:
        return len(s1)
    previous_row = range(len(s2) + 1)
    for (i, c1) in enumerate(s1):
        current_row = [i + 1]
        for (j, c2) in enumerate(s2):
            insertions = previous_row[j + 1] + 1  # j+1 instead of j since
            deletions = current_row[j] + 1  # than s2
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]


def instance_metrics(ref_labels, hyp_labels):
    segment_records = []
    n_segment_tokens, n_segment_seg_errors, n_segment_joint_errors = 0, 0, 0
    for ref, hyp in zip(ref_labels, hyp_labels):
        n_segment_tokens += 1
        if hyp[0] != ref[0]:
            n_segment_seg_errors += 1
        if hyp != ref:
            n_segment_joint_errors += 1
        if ref.startswith("E"):
            segment_records.append((n_segment_tokens, n_segment_seg_errors, n_segment_joint_errors))
            n_segment_tokens, n_segment_seg_errors, n_segment_joint_errors = 0, 0, 0
    
    n_segments = len(segment_records)
    n_tokens = 0
    n_wrong_seg_segments = 0
    n_wrong_seg_tokens = 0
    n_wrong_joint_segments = 0
    n_wrong_joint_tokens = 0
    for (n_segment_tokens, n_segment_seg_errors, n_segment_joint_errors) in segment_records:
        n_tokens += n_segment_tokens
        if n_segment_seg_errors > 0:
            n_wrong_seg_segments += 1
            n_wrong_seg_tokens += n_segment_tokens
        if n_segment_joint_errors > 0:
            n_wrong_joint_segments += 1
            n_wrong_joint_tokens += n_segment_tokens

    DSER = n_wrong_seg_segments / n_segments
    strict_seg_err = n_wrong_seg_tokens / n_tokens
    DER = n_wrong_joint_segments / n_segments
    strict_joint_err = n_wrong_joint_tokens / n_tokens

    ref_short = [x for x in ref_labels if x != "I"]
    hyp_short = [x for x in hyp_labels if x != "I"]
    lwer = jiwer.wer(ref_short, hyp_short)
    return {
        "DSER": DSER,
        "strict segmentation error": strict_seg_err,
        "DER": DER,
        "strict joint error": strict_joint_err,
        "LWER": lwer
    }

def batch_metrics(refs, hyps):
    score_lists = {
        "DSER": [],
        "strict segmentation error": [],
        "DER": [],
        "strict joint error": [],
        "LWER": []
    }
    for ref_labels, hyp_labels in zip(refs, hyps):
        this_metrics = instance_metrics(ref_labels, hyp_labels)
        for k, v in this_metrics.items():
            score_lists[k].append(v)

    flattened_refs = [label for ref in refs for label in ref]
    flattened_hyps = [label for hyp in hyps for label in hyp]
    macro_f1 = f1_score(flattened_refs, flattened_hyps, average="macro")
    micro_f1 = f1_score(flattened_refs, flattened_hyps, average="micro")
    flat_ref_short = [x for x in flattened_refs if x != "I"]
    flat_hyp_short = [x for x in flattened_hyps if x != "I"]
    lwer = jiwer.wer(flat_ref_short, flat_hyp_short)

    return {
        "DSER": np.mean(score_lists["DSER"]),
        "strict segmentation error": np.mean(score_lists["strict segmentation error"]),
        "DER": np.mean(score_lists["DER"]),
        "strict joint error": np.mean(score_lists["strict joint error"]),
        "Macro F1": macro_f1,
        "Micro F1": micro_f1,
        "Macro LWER": np.mean(score_lists["LWER"]),
        "Micro LWER": lwer,
    }

def instance_metrics_asr(ref_labels, hyp_labels):
    ref_short = [x for x in ref_labels if x != "I"]
    hyp_short = [x for x in hyp_labels if x != "I"]
    lwer = jiwer.wer(ref_short, hyp_short)

    ler = jiwer.wer(ref_labels, hyp_labels)
    
    t_ids = [i for i, t in enumerate(ref_labels) if "E" in t]
    r_ids = [i for i, r in enumerate(hyp_labels) if "E" in r]
    s = 0
    for t in t_ids: 
        if r_ids:
            dist_t = min([abs(r - t) for r in r_ids]) 
        else:
            dist_t = len(t_ids)
        s += dist_t

    if r_ids:
        for r in r_ids: 
            s += min([abs(r - t) for t in t_ids])
    else:
        s += len(t_ids)
        
    ser = s / 2 / len(ref_labels)
    nser = abs(len(ref_short) - len(hyp_short)) / len(ref_short)
    
    new_ref = []
    new_hyp = []
    offset = 0
    for i in t_ids:
        new_ref += [ref_labels[i]] * (i - offset + 1)
        offset = i+1 
    offset = 0
    for i in r_ids:
        new_hyp += [hyp_labels[i]] * (i - offset + 1)
        offset = i+1 
    daer = jiwer.wer(new_ref, new_hyp)
    return {"LWER": lwer,
            "LER": ler,
            "SER": ser,
            "NSER": nser,
            "DAER": daer}


def compute_aser(refs, hyps):
    df = align_sseq(refs, hyps)
    #columns = ['toks_label', 'toks_pred', 'tags_label', 'tags_pred']
    ref_labels = df.tags_label.tolist()
    hyp_labels = df.tags_pred.tolist()
    t_ids = [i for i, t in enumerate(ref_labels) if "E_" in t]
    r_ids = [i for i, r in enumerate(hyp_labels) if "E_" in r]
    s = 0
    for t in t_ids: 
        if r_ids:
            dist_t = min([abs(r - t) for r in r_ids]) 
        else:
            dist_t = len(t_ids)
        s += dist_t

    if r_ids:
        for r in r_ids: 
            s += min([abs(r - t) for t in t_ids])
    else:
        s += len(t_ids)
        
    ser = s / 2 / len(refs)
    return ser
    
    
def batch_metrics_asr(refs, hyps):
    score_lists = {
        "LWER": [],
        "LER": [],
        "SER": [],
        "NSER": [],
        "DAER": []
    }
    for ref_labels, hyp_labels in zip(refs, hyps):
        this_metrics = instance_metrics_asr(ref_labels, hyp_labels)
        for k, v in this_metrics.items():
            score_lists[k].append(v)

    flattened_refs = [label for ref in refs for label in ref]
    flattened_hyps = [label for hyp in hyps for label in hyp]
    flat_ref_short = [x for x in flattened_refs if x != "I"]
    flat_hyp_short = [x for x in flattened_hyps if x != "I"]
    lwer = jiwer.wer(flat_ref_short, flat_hyp_short)
    ler = jiwer.wer(flattened_refs, flattened_hyps)
    
    t_ids = [i for i, t in enumerate(flattened_refs) if "E" in t]
    r_ids = [i for i, r in enumerate(flattened_hyps) if "E" in r]
    s = 0
    for t in t_ids: s += min([abs(r - t) for r in r_ids])
    for r in r_ids: s += min([abs(r - t) for t in t_ids])
    ser = s / 2 / len(flattened_refs)
    
    nser = abs(len(t_ids) - len(r_ids)) / len(t_ids)
    
    new_ref = []
    new_hyp = []
    offset = 0
    for i in t_ids:
        new_ref += [flattened_refs[i]] * (i - offset + 1)
        offset = i+1 
    offset = 0
    for i in r_ids:
        new_hyp += [flattened_hyps[i]] * (i - offset + 1)
        offset = i+1 
    daer = jiwer.wer(new_ref, new_hyp)

    return {
        "Macro LWER": np.mean(score_lists["LWER"]),
        "Micro LWER": lwer,
        "Macro LER": np.mean(score_lists["LER"]),
        "Micro LER": ler,
        "Macro SER": np.mean(score_lists["SER"]),
        "Micro SER": ser,
        "Macro NSER": np.mean(score_lists["NSER"]),
        "Micro NSER": nser,
        "Macro DAER": np.mean(score_lists["DAER"]),
        "Micro DAER": daer,
    }

def calc_time_ser(labels, hyps, start_times_orig, end_times_orig, start_times_asr, end_times_asr):
    t_ids = [i for i, t in enumerate(labels) if "E" in t]
    r_ids = [i for i, r in enumerate(hyps) if "E" in r]

    stime_label = [start_times_orig[i] for i in t_ids]
    etime_label = [end_times_orig[i] for i in t_ids]

    stime_asr = [start_times_asr[i] for i in r_ids]
    etime_asr = [end_times_asr[i] for i in r_ids]
    
    s = 0
    for t in stime_label: s += min([abs(r - t) for r in stime_asr]) 
    for r in stime_asr: s += min([abs(r - t) for t in stime_label])
    for t in etime_label: s += min([abs(r - t) for r in etime_asr]) 
    for r in etime_asr: s += min([abs(r - t) for t in etime_label])
        
    return s / 4 / len(t_ids)     


def convert_to_list(this_str, turn_float=False):
    this_str = this_str.replace('[', '').replace(']','')
    this_str = this_str.replace("'", "").replace(",","").split()
    if turn_float:
        this_str = [float(x) for x in this_str]
    return this_str

In [7]:
def get_results_df(model_name, split_name, merged_df):
    suffix = split_name.upper() + '_' +  model_name + '.res'

    trans_file = os.path.join(ref_dir, suffix)
    asr_file = os.path.join(asr_dir, suffix)

    trans_df = pd.read_csv(trans_file, sep="\t")
    asr_df = pd.read_csv(asr_file, sep="\t")
    asr_df.rename(columns={'PREDS': 'PREDS_ASR'}, inplace=True)
    asr_df['PREDS_ASR'] = asr_df.PREDS_ASR.apply(lambda x: x.replace(" </t>", ""))
    preds_df = trans_df.join(asr_df)
    preds_df['labels'] = preds_df.LABELS.apply(lambda x: x.split())
    preds_df['hyps_trans'] = preds_df.PREDS.apply(lambda x: x.split())
    preds_df['hyps_asr'] = preds_df.PREDS_ASR.apply(lambda x: x.split())
    preds_df.rename(columns={'TURN_ID': 'main_id'}, inplace=True)
    preds_df.drop(columns=['LABELS', 'PREDS', 'PREDS_ASR'], inplace=True)
    res_df = pd.merge(preds_df, merged_df, on='main_id')

    results = res_df.apply(lambda row: instance_metrics(row.labels, row.hyps_trans), axis=1)
    results_asr = res_df.apply(lambda row: instance_metrics_asr(row.labels, row.hyps_asr), axis=1)
    results2 = res_df.apply(lambda row: instance_metrics_asr(row.labels, row.hyps_trans), axis=1)

    res_df['DSER'] = [x['DSER'] for x in results.tolist()]
    res_df['DER'] = [x['DER'] for x in results.tolist()]
    res_df['LWER_trans'] = [x['LWER'] for x in results.tolist()]
    res_df['LER_trans'] = [x['LER'] for x in results2.tolist()]
    res_df['SER_trans'] = [x['SER'] for x in results2.tolist()]
    res_df['NSER_trans'] = [x['NSER'] for x in results2.tolist()]
    res_df['DAER_trans'] = [x['DAER'] for x in results2.tolist()]

    res_df['LWER_asr'] = [x['LWER'] for x in results_asr.tolist()]
    res_df['LER_asr'] = [x['LER'] for x in results_asr.tolist()]
    res_df['SER_asr'] = [x['SER'] for x in results_asr.tolist()]
    res_df['NSER_asr'] = [x['NSER'] for x in results_asr.tolist()]
    res_df['DAER_asr'] = [x['DAER'] for x in results_asr.tolist()]
    
    res_df['ASER_asr'] = res_df.apply(lambda row: 
                                  compute_aser( list(zip(row.da_turn_orig, row.labels)), 
                                               list(zip(row.da_turn_asr, row.hyps_asr)) ),
                                  axis=1)
    res_df['ASER_trans'] = res_df.apply(lambda row: 
                                  compute_aser( list(zip(row.da_turn_orig, row.labels)), 
                                               list(zip(row.da_turn_orig, row.hyps_trans)) ),
                                  axis=1)

    return res_df

# Small examples

In [None]:
labels = ["E_b", "E_sv", "I", "I", "E_sd"]
preds = ["E_aa", "I", "I", "E_sv", "E_sd"]
asr = ["E_ny", "I", "I", "E_sv", "E_sd", "E_ny"]

toks2 = ["right", "yes", "he", "loves", "cats"]
astoks2 = ["yes", "yes", "he", "loves", "cats", "yes"]
#instance_metrics(labels, preds)
#batch_metrics([labels], [preds])
#instance_metrics_asr(labels, asr)
compute_aser(list(zip(toks2, labels)), list(zip(astoks2, asr)))

#f1_score(labels, preds, average="micro")
l = ['b', 'sv', 'sd', 'sd', 'sd']
h = ['ny', 'sv', 'sv', 'sv', 'sd', 'ny']
jiwer.wer(l, h)

# Load data

In [8]:
ref_dir = "/homes/ttmt001/transitory/dialog-act-prediction/data/joint/ref_out"
asr_dir = "/homes/ttmt001/transitory/dialog-act-prediction/data/joint/asr_out"

dialog_acts = ['sd', 'b', 'sv', '%', 'aa', 'ba', 'qy', 'ny', 'fc',
                'qw', 'nn', 'bk', 'fo_o_fw_"_by_bc', 'h', 'qy^d', 'bh', '^q',
                'bf', 'na', 'ad', '^2', 'b^m', 'qo', 'qh', '^h', 'ar', 'ng',
                'br', 'no', 'fp', 'qrr', 'arp_nd', 't3', 'oo_co_cc', 't1', 'bd',
                'aap_am', '^g', 'qw^d', 'fa', 'ft']
joint_labels = ["I"] + ["E_"+da for da in dialog_acts]
label_map = dict(zip(range(len(joint_labels)), joint_labels))
#label_map


In [9]:
split_name = 'dev'
filename = split_name + "_merged.tsv"
merged_df = pd.read_csv(filename, sep="\t")
for column in ['joint_labels', 'da_turn_orig', 'da_turn_asr']:
    merged_df[column] = merged_df[column].apply(convert_to_list)
for column in ['start_times_orig', 'end_times_orig', 'start_times_asr', 'end_times_asr']:
    merged_df[column] = merged_df[column].apply(convert_to_list, turn_float=True)

sp10004_df = get_results_df("sp10004", split_name, merged_df)
sp30004_df = get_results_df("sp30004", split_name, merged_df)

tt1000_df = get_results_df("tt1000", split_name, merged_df)
tt3000_df = get_results_df("tt3000", split_name, merged_df)


In [None]:
# Diagnostics

# WER 
tokens = sp10004_df.da_turn_orig.tolist()
tokens = [label for ref in tokens for label in ref]
asr = sp10004_df.da_turn_asr.tolist()
asr = [label for ref in asr for label in ref]
jiwer.wer(tokens,asr)

refs = sp10004_df.labels.tolist()
hyps = sp10004_df.hyps_asr.tolist()
trans = sp10004_df.hyps_trans.tolist()
flattened_refs = [label for ref in refs for label in ref]
flattened_hyps = [label for hyp in hyps for label in hyp]
flattened_trans = [label for hyp in trans for label in hyp]

refs = sp10004_df.da_turn_orig.tolist()
hyps = sp10004_df.da_turn_asr.tolist()
flattened_refs_toks = [label for ref in refs for label in ref]
flattened_hyps_toks = [label for hyp in hyps for label in hyp]

aa = list(zip(flattened_refs_toks, flattened_refs))
bb = list(zip(flattened_hyps_toks, flattened_hyps))
cc = list(zip(flattened_refs_toks, flattened_trans))

print("Prosody mode, trans:")
print("Macro ASER:", sp10004_df.ASER_trans.mean())
print("Micro ASER:", compute_aser(aa, cc))

print("Prosody mode, asr:")
print("Macro ASER:", sp10004_df.ASER_asr.mean())
print("Micro ASER:", compute_aser(aa, bb))

#print("Macro SER:", sp10004_df.SER_asr.mean())

refs = tt1000_df.labels.tolist()
hyps = tt1000_df.hyps_asr.tolist()
trans = tt1000_df.hyps_trans.tolist()
flattened_refs = [label for ref in refs for label in ref]
flattened_hyps = [label for hyp in hyps for label in hyp]
flattened_trans = [label for hyp in trans for label in hyp]

refs = tt1000_df.da_turn_orig.tolist()
hyps = tt1000_df.da_turn_asr.tolist()
flattened_refs_toks = [label for ref in refs for label in ref]
flattened_hyps_toks = [label for hyp in hyps for label in hyp]

aa = list(zip(flattened_refs_toks, flattened_refs))
bb = list(zip(flattened_hyps_toks, flattened_hyps))
cc = list(zip(flattened_refs_toks, flattened_trans))

print("Text model, trans:")
print("Macro ASER:", tt1000_df.ASER_trans.mean())
print("Micro ASER:", compute_aser(aa, cc))

print("Text mode, asr:")
print("Macro ASER:", tt1000_df.ASER_asr.mean())
print("Micro ASER:", compute_aser(aa, bb))

In [None]:
len(sp10004_df.iloc[1].da_turn_asr)

In [None]:
batch_metrics(sp10004_df.labels.tolist(), sp10004_df.hyps_trans.tolist())

In [None]:
batch_metrics(tt1000_df.labels.tolist(), tt1000_df.hyps_trans.tolist())

In [None]:
batch_metrics_asr(sp10004_df.labels.tolist(), sp10004_df.hyps_trans.tolist())

In [None]:
batch_metrics_asr(tt1000_df.labels.tolist(), tt1000_df.hyps_trans.tolist())

In [None]:
batch_metrics_asr(sp10004_df.labels.tolist(), sp10004_df.hyps_asr.tolist())

In [None]:
batch_metrics_asr(tt1000_df.labels.tolist(), tt1000_df.hyps_asr.tolist())

In [None]:
#sp10004_df.head(3)
#tt1000_df.head(3)

In [None]:
sp5001_df = get_results_df("sp5001", split_name, merged_df)
tt5001_df = get_results_df("tt5001", split_name, merged_df)

batch_metrics_asr(tt5001_df.labels.tolist(), tt5001_df.hyps_trans.tolist())

batch_metrics_asr(sp5001_df.labels.tolist(), sp5001_df.hyps_trans.tolist())

batch_metrics_asr(tt5001_df.labels.tolist(), tt5001_df.hyps_asr.tolist())

batch_metrics_asr(sp5001_df.labels.tolist(), sp5001_df.hyps_asr.tolist())

## On test set:

In [None]:
filename = "test_merged.tsv"
mt_df = pd.read_csv(filename, sep="\t")
for column in ['joint_labels', 'da_turn_orig', 'da_turn_asr']:
    mt_df[column] = mt_df[column].apply(convert_to_list)
for column in ['start_times_orig', 'end_times_orig', 'start_times_asr', 'end_times_asr']:
    mt_df[column] = mt_df[column].apply(convert_to_list, turn_float=True)

test_sp10004_df = get_results_df("sp10004", "test", mt_df)
test_tt1000_df = get_results_df("tt1000", "test", mt_df)
#test_sp30004_df = get_results_df("sp30004", "test", mt_df)
#test_tt3000_df = get_results_df("tt3000", "test", mt_df)

tokens = test_sp10004_df.da_turn_orig.tolist()
tokens = [label for ref in tokens for label in ref]
asr = test_sp10004_df.da_turn_asr.tolist()
asr = [label for ref in asr for label in ref]

jiwer.wer(tokens,asr)

In [None]:
#print("Macro SER:", sp10004_df.SER_asr.mean())

refs = test_sp10004_df.labels.tolist()
hyps = test_sp10004_df.hyps_asr.tolist()
trans = test_sp10004_df.hyps_trans.tolist()
flattened_refs = [label for ref in refs for label in ref]
flattened_hyps = [label for hyp in hyps for label in hyp]
flattened_trans = [label for hyp in trans for label in hyp]

refs = test_sp10004_df.da_turn_orig.tolist()
hyps = test_sp10004_df.da_turn_asr.tolist()
flattened_refs_toks = [label for ref in refs for label in ref]
flattened_hyps_toks = [label for hyp in hyps for label in hyp]

aa = list(zip(flattened_refs_toks, flattened_refs))
bb = list(zip(flattened_hyps_toks, flattened_hyps))
cc = list(zip(flattened_refs_toks, flattened_trans))

print("Prosody mode, trans:")
print("Macro ASER:", test_sp10004_df.ASER_trans.mean())
print("Micro ASER:", compute_aser(aa, cc))

print("Prosody mode, asr:")
print("Macro ASER:", test_sp10004_df.ASER_asr.mean())
print("Micro ASER:", compute_aser(aa, bb))

#print("Macro SER:", sp10004_df.SER_asr.mean())

refs = test_tt1000_df.labels.tolist()
hyps = test_tt1000_df.hyps_asr.tolist()
trans = test_tt1000_df.hyps_trans.tolist()
flattened_refs = [label for ref in refs for label in ref]
flattened_hyps = [label for hyp in hyps for label in hyp]
flattened_trans = [label for hyp in trans for label in hyp]

refs = test_tt1000_df.da_turn_orig.tolist()
hyps = test_tt1000_df.da_turn_asr.tolist()
flattened_refs_toks = [label for ref in refs for label in ref]
flattened_hyps_toks = [label for hyp in hyps for label in hyp]

aa = list(zip(flattened_refs_toks, flattened_refs))
bb = list(zip(flattened_hyps_toks, flattened_hyps))
cc = list(zip(flattened_refs_toks, flattened_trans))

print("Text model, trans:")
print("Macro ASER:", test_tt1000_df.ASER_trans.mean())
print("Micro ASER:", compute_aser(aa, cc))

print("Text model, asr:")
print("Macro ASER:", test_tt1000_df.ASER_asr.mean())
print("Micro ASER:", compute_aser(aa, bb))

In [None]:
batch_metrics_asr(test_tt1000_df.labels.tolist(), test_tt1000_df.hyps_trans.tolist())

In [None]:
batch_metrics_asr(test_sp10004_df.labels.tolist(), test_sp10004_df.hyps_trans.tolist())

In [None]:
batch_metrics_asr(test_tt1000_df.labels.tolist(), test_tt1000_df.hyps_asr.tolist())

In [None]:
batch_metrics_asr(test_sp10004_df.labels.tolist(), test_sp10004_df.hyps_asr.tolist())

In [None]:
batch_metrics(test_tt1000_df.labels.tolist(), test_tt1000_df.hyps_trans.tolist())

In [None]:
batch_metrics(test_sp10004_df.labels.tolist(), test_sp10004_df.hyps_trans.tolist())

## Questions

1. Where does speech model help, given perfect transcript?
2. Which tags get confused most in the transcript vs. in speech model?
3. Where does speech model help, on ASR?
4. Which tags get confused most in ASR using only ASR transcript vs. ASR transcript + speech?

In [11]:
rename_cols = ["hyps_trans", "hyps_asr", "DSER", "DER", "LWER_trans", "LER_trans",
              "SER_trans", "NSER_trans", "DAER_trans", "ASER_trans", "ASER_asr",
              "LWER_asr", "LER_asr", "SER_asr", "NSER_asr", "DAER_asr"]
dup_cols = ["labels", "joint_labels", "da_turn_orig", "da_turn_asr",
           "start_times_orig", "start_times_asr", "end_times_orig", "end_times_asr"]

new_sp = [x+'_sp' for x in rename_cols]
new_tt = [x+'_tt' for x in rename_cols]
temp_sp = sp10004_df.rename(columns = dict(zip(rename_cols, new_sp)))
temp_tt = tt1000_df.rename(columns = dict(zip(rename_cols, new_tt)))
temp_tt.drop(columns = dup_cols, inplace=True)

new_df = pd.merge(temp_sp, temp_tt, on="main_id")
new_df['num_seg_label'] = new_df.labels.apply(lambda x: len([y for y in x if "E" in y]))
new_df['num_seg_trans_sp'] = new_df.hyps_trans_sp.apply(lambda x: len([y for y in x if "E" in y]))
new_df['num_seg_asr_sp'] = new_df.hyps_asr_sp.apply(lambda x: len([y for y in x if "E" in y]))
new_df['num_seg_trans_tt'] = new_df.hyps_trans_tt.apply(lambda x: len([y for y in x if "E" in y]))
new_df['num_seg_asr_tt'] = new_df.hyps_asr_tt.apply(lambda x: len([y for y in x if "E" in y]))

new_df['diff_seg_trans_sp'] = new_df['num_seg_trans_sp'] - new_df['num_seg_label'] 
new_df['diff_seg_trans_tt'] = new_df['num_seg_trans_tt'] - new_df['num_seg_label'] 
new_df['diff_seg_asr_sp'] = new_df['num_seg_asr_sp'] - new_df['num_seg_label'] 
new_df['diff_seg_asr_tt'] = new_df['num_seg_asr_tt'] - new_df['num_seg_label'] 

## History length = 3

In [None]:
temp_sp = sp30004_df.rename(columns = dict(zip(rename_cols, new_sp)))
temp_tt = tt3000_df.rename(columns = dict(zip(rename_cols, new_tt)))
temp_tt.drop(columns = dup_cols, inplace=True)

new3_df = pd.merge(temp_sp, temp_tt, on="main_id")
new3_df['num_seg_label'] = new3_df.labels.apply(lambda x: len([y for y in x if "E" in y]))
new3_df['num_seg_trans_sp'] = new3_df.hyps_trans_sp.apply(lambda x: len([y for y in x if "E" in y]))
new3_df['num_seg_asr_sp'] = new3_df.hyps_asr_sp.apply(lambda x: len([y for y in x if "E" in y]))
new3_df['num_seg_trans_tt'] = new3_df.hyps_trans_tt.apply(lambda x: len([y for y in x if "E" in y]))
new3_df['num_seg_asr_tt'] = new3_df.hyps_asr_tt.apply(lambda x: len([y for y in x if "E" in y]))

new3_df['diff_seg_trans_sp'] = new3_df['num_seg_trans_sp'] - new3_df['num_seg_label'] 
new3_df['diff_seg_trans_tt'] = new3_df['num_seg_trans_tt'] - new3_df['num_seg_label'] 
new3_df['diff_seg_asr_sp'] = new3_df['num_seg_asr_sp'] - new3_df['num_seg_label'] 
new3_df['diff_seg_asr_tt'] = new3_df['num_seg_asr_tt'] - new3_df['num_seg_label'] 

## More analysis

In [12]:
measures1 = ["DER", "DSER"]
measures2 = ["SER", "NSER", "LER", "LWER", "DAER", "ASER"]
measure_cols_trans = []
measure_cols_asr = []

for m in measures1:
    new_df['sptt_diff_' + m] = new_df[m + "_tt"] - new_df[m + "_sp"]
    measure_cols_trans += ['sptt_diff_' + m]

suffix = 'trans'
for m in measures2:
    new_df['sptt_diff_' + suffix + '_' + m] = new_df[m+"_"+suffix +"_tt"] - new_df[m+"_"+suffix+"_sp"]
    measure_cols_trans += ['sptt_diff_' + suffix + '_' + m]

suffix = 'asr'
for m in measures2:
    new_df['sptt_diff_' + suffix + '_' + m] = new_df[m+"_"+suffix +"_tt"] - new_df[m+"_"+suffix+"_sp"]
    measure_cols_asr += ['sptt_diff_' + suffix + '_' + m]
    
measure_cols = measure_cols_trans + measure_cols_asr

metric_cols = ["DER_tt", "DSER_tt", "DER_sp", "DSER_sp",
              "SER_trans_tt", "NSER_trans_tt", "LER_trans_tt", "LWER_trans_tt", "DAER_trans_tt",
              "SER_trans_sp", "NSER_trans_sp", "LER_trans_sp", "LWER_trans_sp", "DAER_trans_sp",
              "SER_asr_tt", "NSER_asr_tt", "LER_asr_tt", "LWER_asr_tt", "DAER_asr_tt",
              "SER_asr_sp", "NSER_asr_sp", "LER_asr_sp", "LWER_asr_sp", "DAER_asr_sp",
              "ASER_trans_tt", "ASER_trans_sp", "ASER_asr_tt", "ASER_asr_sp",]

In [None]:
tokens = new_df.da_turn_orig.tolist()
tokens = [label for ref in tokens for label in ref]
temp = new_df.labels.tolist()
labels = [label for ref in temp for label in ref]
sp = new_df.hyps_trans_sp.tolist()
hyps_sp = [label for ref in sp for label in ref]
tt = new_df.hyps_trans_tt.tolist()
hyps_tt = [label for ref in tt for label in ref]

idxlen = [len(x) for x in tokens]
idxs = []
i = 0
for ilen in idxlen:
    idxs += [i] * ilen
    i += 1

tups = list(zip(idxs, tokens, labels, hyps_sp, hyps_tt))
missed_segments_sp = [x for x in tups if x[-2]=='I' and x[2]!=x[-2]]
missed_segments_tt = [x for x in tups if x[-1]=='I' and x[2]!=x[-1]]

inserted_segments_sp = [x for x in tups if x[2]=='I' and x[2]!=x[-2]]
inserted_segments_tt = [x for x in tups if x[2]=='I' and x[2]!=x[-1]]

total_segments = len([x for x in labels if x != "I"])
print("Total number of segments", total_segments)
print("Number of missed segments, speech model:", len(missed_segments_sp))
print("Number of missed segments, text model:", len(missed_segments_tt))
print("Number of inserted segments, speech model:", len(inserted_segments_sp))
print("Number of inserted segments, text model:", len(inserted_segments_tt))
print()
print("Most common tokens associated with missed E_, speech model")
print(Counter([x[1] for x in missed_segments_sp]).most_common(15))
print("Most common tokens associated with inserted E_, speech model")
print(Counter([x[1] for x in inserted_segments_sp]).most_common(15))
print()
print("Most common tokens associated with missed E_, text model")
print(Counter([x[1] for x in missed_segments_tt]).most_common(15))
print("Most common tokens associated with inserted E_, text model")
print(Counter([x[1] for x in inserted_segments_tt]).most_common(15))



In [None]:
missed_segments_sp

In [None]:
# tups: idxs, tokens, labels, hyps_sp, hyps_tt
#sp_sv = [x for x in tups if x[-2]=='E_sv' and x[2]!=x[-2]]
#sp_sd = [x for x in tups if x[-2]=='E_sd' and x[2]!=x[-2] and x[-1]!=x[-2]]

print(len([x for x in tups if x[-2] == "E_sv"]))
print(len([x for x in tups if x[-2] == "E_sd"]))
print(len([x for x in tups if x[-1] == "E_sv"]))
print(len([x for x in tups if x[-1] == "E_sd"]))


In [14]:
500/(500 + 1272)

0.28216704288939054

In [15]:
393/(393 + 1384)

0.22115925717501406

In [13]:
measure_cols

['sptt_diff_DER',
 'sptt_diff_DSER',
 'sptt_diff_trans_SER',
 'sptt_diff_trans_NSER',
 'sptt_diff_trans_LER',
 'sptt_diff_trans_LWER',
 'sptt_diff_trans_DAER',
 'sptt_diff_trans_ASER',
 'sptt_diff_asr_SER',
 'sptt_diff_asr_NSER',
 'sptt_diff_asr_LER',
 'sptt_diff_asr_LWER',
 'sptt_diff_asr_DAER',
 'sptt_diff_asr_ASER']

# Find specific examples

In [None]:
col = "sptt_diff_asr_LWER"
new_df.sort_values(col)[['main_id', 'wer_x','num_seg_label', 'da_turn_orig']][-50:-20]

In [None]:
#loc = 387
loc = 1634
print("TIMES", "loc =", loc, new_df.iloc[loc].main_id)
print(new_df.iloc[loc].start_times_orig[0], new_df.iloc[loc].end_times_orig[-1])
print(new_df.iloc[loc].start_times_asr[0], new_df.iloc[loc].end_times_asr[-1])

new_df.iloc[loc][metric_cols]

In [None]:
new_df.iloc[loc][measure_cols]

In [None]:
a = list(zip(new_df.iloc[loc].da_turn_orig, new_df.iloc[loc].labels))
b1 = list(zip(new_df.iloc[loc].da_turn_orig, new_df.iloc[loc].hyps_trans_sp))
b2 = list(zip(new_df.iloc[loc].da_turn_asr, new_df.iloc[loc].hyps_asr_sp))
b3 = list(zip(new_df.iloc[loc].da_turn_asr, new_df.iloc[loc].hyps_asr_tt))

# PRINT LABEL & TRANSCRIPT SEQUENCE
for x,y,x2 in list(zip(new_df.iloc[loc].labels, new_df.iloc[loc].da_turn_orig, new_df.iloc[loc].hyps_trans_tt)):
    print(y, "\t\t", x, "\t", x2)
print()

for x,y,x2 in list(zip(new_df.iloc[loc].labels, new_df.iloc[loc].da_turn_orig, new_df.iloc[loc].hyps_trans_sp)):
    print(y, "\t\t", x, "\t", x2)
print()


In [None]:
# PRINT LABEL & ASR SEQUENCE

print_sseq(a, b3, tags=True)
print()
print_sseq(a, b2, tags=True)


## Confusion matrices

In [None]:
tokens = new_df.da_turn_orig.tolist()
tokens = [label for ref in tokens for label in ref]
temp = new_df.labels.tolist()
labels = [label for ref in temp for label in ref]
sp = new_df.hyps_trans_sp.tolist()
hyps_sp = [label for ref in sp for label in ref]
tt = new_df.hyps_trans_tt.tolist()
hyps_tt = [label for ref in tt for label in ref]


classes = list(label_map.values())
#subclasses = ["I", "E_sd", "E_sv", "E_%", "E_b", "E_aa", "E_ba", "E_bh", "E_qy", "E_qy^d"]
subclasses = ["E_sd", "E_sv", "E_%", "E_b", "E_aa", "E_ba", "E_bh", "E_qy", "E_qy^d"]

subclasses1 = ["E_sd", "E_sv", "E_qy", "E_qy^d"]
subclasses2 = ["E_b", "E_aa", "E_ba", "E_bh"]
subclasses3 = ["E_b", "E_ba"]

cm = confusion_matrix(labels, hyps_sp, labels=subclasses)
#plot_confusion_matrix(cm, subclasses, title="Speech Model")

#print(cm)
#sum(cm[0][1:])

cm = confusion_matrix(labels, hyps_tt, labels=subclasses)
#plot_confusion_matrix(cm, subclasses, title="Text Model")

In [None]:
labels_short = [x for x in labels if "E_" in x]
hyps_sp_short = [x for x in hyps_sp if "E_" in x] 
hyps_tt_short =  [x for x in hyps_tt if "E_" in x] 

df = align_sseq(list(zip(labels_short,labels_short)), list(zip(hyps_sp_short,hyps_sp_short)))
ref_labels = df.tags_label.tolist()
hyp_labels = df.tags_pred.tolist()

cm = confusion_matrix(ref_labels, hyp_labels, labels=subclasses)
plot_confusion_matrix(cm, subclasses, title="Speech Model")

#print(cm)
#sum(cm[0][1:])

df = align_sseq(list(zip(labels_short,labels_short)), list(zip(hyps_tt_short,hyps_tt_short)))
ref_labels = df.tags_label.tolist()
hyp_labels = df.tags_pred.tolist()
cm = confusion_matrix(ref_labels, hyp_labels, labels=subclasses)
plot_confusion_matrix(cm, subclasses, title="Text Model")


In [None]:
# SLER cms
labels_short = [[x for x in y if "E_" in x] for y in temp]
hyps_sp_short =  [[x for x in y if "E_" in x] for y in sp]
hyps_tt_short =  [[x for x in y if "E_" in x] for y in tt]

sp_table = np.zeros((len(subclasses), len(subclasses)))
for ref, hyp in zip(labels_short, hyps_sp_short):
    df = align_sseq(list(zip(ref,ref)), list(zip(hyp,hyp)))
    ref_labels = df.tags_label.tolist()
    hyp_labels = df.tags_pred.tolist()
    print(ref_labels, hyp_labels)
    cm = confusion_matrix(ref_labels, hyp_labels, labels=subclasses)
    sp_table = sp_table + cm
    
sp_table

In [None]:
err_sp = [(x,y) for x,y in list(zip(labels, hyps_sp)) if x!=y]

ys_true = [x[0] for x in err_sp]
ys_pred = [x[1] for x in err_sp]

csl = Counter(ys_true) # most frequently missed labels
csh = Counter(ys_pred) # most frequently falsely recognized labels

slabel_err = [x[0] for x in csl.most_common(10)]
spred_err = [x[0] for x in csh.most_common(10)]
sclasses = set(spred_err).union(set(slabel_err))

err_tt = [(x,y) for x,y in list(zip(labels, hyps_tt)) if x!=y]
yt_true = [x[0] for x in err_tt]
yt_pred = [x[1] for x in err_tt]

ctl = Counter(yt_true)
cth = Counter(yt_pred)

tlabel_err = [x[0] for x in ctl.most_common(10)]
tpred_err = [x[0] for x in cth.most_common(10)]
tclasses = set(tpred_err).union(set(tlabel_err))

common_classes = tclasses.intersection(sclasses)
common_classes

#[x for x in err_sp if x[0] == "I" and x[1] == "E_%"]
# csl.most_common()
# csh.most_common()

In [None]:
cm = confusion_matrix(ys_true, ys_pred, labels=sorted(common_classes))
plot_confusion_matrix(cm, sorted(common_classes), title="Most Common Errors -- Prosody Model")

In [None]:
cm = confusion_matrix(yt_true, yt_pred, labels=sorted(common_classes))
plot_confusion_matrix(cm, sorted(common_classes), title="Most Common Errors -- Text Model")

In [None]:
tclasses.difference(sclasses)

In [None]:
sclasses.difference(tclasses)

## Misc 

In [None]:
# Row specific (debug)
sseq = SequenceMatcher(None, row.da_turn_asr, row.da_turn_orig)

ref_side = list(zip(range(len(row.labels)),row.labels, row.start_times_orig, row.end_times_orig, row.da_turn_orig))
ref_segments = [x for x in ref_side if "E" in x[1]]
hyp_side = list(zip(range(len(row.hyps_asr)), row.hyps_asr, row.start_times_asr, row.end_times_asr, row.da_turn_asr))
hyp_segments = [x for x in hyp_side if "E" in x[1]]


ref_list = res_df.labels.tolist()
trans_list = res_df.hyps_trans.tolist()
asr_list = res_df.hyps_asr.tolist()

batch_metrics(ref_list, trans_list)

batch_metrics_asr(ref_list, asr_list)


In [None]:
a = ["aa", "sv", "sv", "sv", "sv"]
b = ["aa", "sd", "sd", "sd", "ny", "ny"]

sseq = SequenceMatcher(None, a, b)
sseq.get_opcodes()

#print(levenshtein(a, b) / len(a))
#print(jiwer.wer(a, b))

In [None]:

loc = 1600
a = list(zip(new_df.iloc[loc].da_turn_orig, new_df.iloc[loc].labels))
b1 = list(zip(new_df.iloc[loc].da_turn_orig, new_df.iloc[loc].hyps_trans_sp))
b2 = list(zip(new_df.iloc[loc].da_turn_asr, new_df.iloc[loc].hyps_asr_sp))
b3 = list(zip(new_df.iloc[loc].da_turn_asr, new_df.iloc[loc].hyps_asr_tt))

adf = align_sseq(a, b3)
print(instance_metrics_asr(new_df.iloc[loc].labels, new_df.iloc[loc].hyps_asr_sp))
print("aser:",compute_aser(a, b2))

adf

In [None]:
df = tt1000_df.copy()
df['time_ser'] = df.apply(lambda row: calc_time_ser(row.labels, 
              row.hyps_asr, 
              row.start_times_orig, 
              row.end_times_orig, 
              row.start_times_asr, 
              row.end_times_asr), axis=1)

print(df['time_ser'].mean())
print()

l = df.labels.tolist()
l = [item for sublist in l for item in sublist]
h = df.hyps_asr.tolist()
h = [item for sublist in h for item in sublist]
sorig = df.start_times_orig.tolist()
sorig = [item for sublist in sorig for item in sublist]
eorig = df.end_times_orig.tolist()
eorig = [item for sublist in eorig for item in sublist]
sasr = df.start_times_asr.tolist()
sasr = [item for sublist in sasr for item in sublist]
easr = df.end_times_asr.tolist()
easr = [item for sublist in easr for item in sublist]

calc_time_ser(l, h, sorig, eorig, sasr, easr)

In [None]:
df = sp10004_df.copy()
df['time_ser'] = df.apply(lambda row: calc_time_ser(row.labels, 
              row.hyps_asr, 
              row.start_times_orig, 
              row.end_times_orig, 
              row.start_times_asr, 
              row.end_times_asr), axis=1)

print(df['time_ser'].mean())
print()

l = df.labels.tolist()
l = [item for sublist in l for item in sublist]
h = df.hyps_asr.tolist()
h = [item for sublist in h for item in sublist]
sorig = df.start_times_orig.tolist()
sorig = [item for sublist in sorig for item in sublist]
eorig = df.end_times_orig.tolist()
eorig = [item for sublist in eorig for item in sublist]
sasr = df.start_times_asr.tolist()
sasr = [item for sublist in sasr for item in sublist]
easr = df.end_times_asr.tolist()
easr = [item for sublist in easr for item in sublist]

calc_time_ser(l, h, sorig, eorig, sasr, easr)

In [None]:
new3_df.iloc[loc][metric_cols]

In [None]:
loc, new3_df.iloc[loc].main_id

In [None]:
a = list(zip(new3_df.iloc[loc].da_turn_orig, new3_df.iloc[loc].labels))
b1 = list(zip(new3_df.iloc[loc].da_turn_orig, new3_df.iloc[loc].hyps_trans_sp))
b2 = list(zip(new3_df.iloc[loc].da_turn_asr, new3_df.iloc[loc].hyps_asr_sp))
b3 = list(zip(new3_df.iloc[loc].da_turn_asr, new3_df.iloc[loc].hyps_asr_tt))

# PRINT LABEL & TRANSCRIPT SEQUENCE
for x,y,x2 in list(zip(new3_df.iloc[loc].labels, new3_df.iloc[loc].da_turn_orig, new3_df.iloc[loc].hyps_trans_tt)):
    print(y, "\t\t", x, "\t", x2)
print()

for x,y,x2 in list(zip(new3_df.iloc[loc].labels, new3_df.iloc[loc].da_turn_orig, new3_df.iloc[loc].hyps_trans_sp)):
    print(y, "\t\t", x, "\t", x2)
print()

In [None]:
print_sseq(a, b3, tags=True)
print()
print_sseq(a, b2, tags=True)

In [None]:


temp_df = pd.merge(new_df, new3_df, on='main_id')

In [None]:
print(temp_df.columns.tolist())

In [None]:
temp_df[temp_df.DSER_sp_x < temp_df.DSER_sp_y][["DSER_sp_x", "DSER_sp_y", "DER_sp_x", "DER_sp_y", "num_seg_label_x"]]

In [None]:
print(list(zip(new_df.iloc[1425].start_times_orig, new_df.iloc[1382].end_times_orig)))