# Week 4: Keyword search using dynamic time warping
This week we're going to apply KWS to some actual speech data. Even though last week we learned about WFSTs and how they can be applied to KWS, we're not going to talk about WFSTs or Markov models this week. Instead, we're going to introduce *Dynamic Time Warping* (DTW), a method for comparing to sequences of data that can be used for speech-to-speech KWS.

"Speech-to-speech" means that instead of looking for a keyword by its *string*, we take the *audio* of an example keyword and compare the keyword audio to a sentence to determine if that sentence contains it. Since we're making comparisons between two different audio sequences, we need a metric that will describe the similarity of two sequences. That's where DTW comes into play.

Before starting this week's coding exercise, I recommend watching [all four videos from Herman Kamper on DTW](https://www.youtube.com/playlist?list=PLmZlBIcArwhMJoGk5zpiRlkaHUqy5dLzL). Once you're done with that, we can dive into applying DTW to some audio!

In [None]:
import os
from glob import glob
import pandas as pd
import librosa
from librikws import *
import numpy as np
from tslearn.metrics import dtw_path, dtw, dtw_path_from_metric
from scipy.spatial.distance import cdist, cosine
from praatio import textgrid
import matplotlib.pyplot as plt
import sklearn
from sklearn.metrics import roc_curve, roc_auc_score
import seaborn as sns
from tqdm import tqdm
tqdm.pandas()

## Section 1: Comparing sequences with Dynamic Time Warping

Hopefully by now you've watched the youtube videos, and you're familiar with DTW. Still, it's useful to have a demo to play with in Python. To do that, let's two simple sequences of 5 points, where the first sequence has a spike at the fourth point and the second sequence has a spike at the second point.

In [None]:
fig, ax = plt.subplots()
seq_a = np.array([0,0,0,1,0])
seq_b = np.array([0,1,0,0,0])
ax.plot(seq_a, label='seq_a')
ax.plot(seq_b, label='seq_b')
ax.legend()
plt.show()

I've included a simple implementation of the DTW algorithm so we can visualize what the matrix looks like for this sequence.

In [None]:
def get_dtw_matrix(seq_a, seq_b, distance_funct = 'absolute_diff'):
    if distance_funct == 'absolute_diff':
        distance_funct = lambda a, b: np.abs(a-b)

    # pad DTW matrix with an empty row
    padded_len = seq_a.shape[0]+1
    padded_width = seq_b.shape[0]+1
    dtw_matrix = np.full((padded_len,padded_width), np.inf)
    dtw_matrix[0,0]=0

    for i, a_i in enumerate(seq_a, start=1):
        for j, b_j in enumerate(seq_b, start=1):
            current_distance = distance_funct(a_i, b_j)
            
            left = dtw_matrix[i,j-1]
            bottom = dtw_matrix[i-1,j]
            diag = dtw_matrix[i-1,j-1]

            prev_distance = min(left, bottom, diag)

            dtw_matrix[i,j] = current_distance + prev_distance
    # trim padded
    return dtw_matrix[1:,1:]

def plot_distance_matrix(distance_matrix, ax):
    ax.imshow(1-distance_matrix, origin='lower', cmap='Blues', aspect='auto')
    ax.axis("off")
    # ax.autoscale(False)

distance_matrix = get_dtw_matrix(seq_a, seq_b)
fig, ax = plt.subplots()
plot_distance_matrix(distance_matrix, ax)
plt.show()

Let's plot the sequences alongside the matrix for best visibility and visualize the path through the DTW marix (code adapted from [tslearn documentation](https://tslearn.readthedocs.io/en/stable/auto_examples/metrics/plot_dtw.html#sphx-glr-auto-examples-metrics-plot-dtw-py)).

In [None]:
def get_dtw_axes() -> Tuple[plt.Axes, plt.Axes, plt.Axes]:
    """
    Returns:
                Tuple[left_ax, center_ax, top_ax], pyplot axes
                corresponding to top, center and left rectangles
                where seq_a is visualized in top, seq_b in left
                and the distance matrix and path in center.
    """
    fig = plt.figure(figsize=(5, 5))
    
    # definitions for the axes
    left, bottom = 0.01, 0.1
    left_width = top_height = 0.2
    inner_left = left + left_width + 0.02
    width = height = 0.65
    bottom_of_top = bottom + height + 0.02
    
    # (left, bottom, width, height)
    left_rectangle = [left, bottom, left_width, height]
    center_rectangle = [inner_left, bottom, width, height]
    top_rectangle = [inner_left, bottom_of_top, width, top_height]
    
    top_ax = plt.axes(top_rectangle)
    center_ax = plt.axes(center_rectangle)
    left_ax = plt.axes(left_rectangle)

    return left_ax, center_ax, top_ax

def plot_dtw_kws(seq_a, seq_b, distance_matrix=None, path=None):
    """
    Arguments:
        seq_a:              First sequence of numbers of vectors to compare
        seq_b:              Second sequence of numbers of vectors to compare
        distance_matrix:    Optional: pre-computed distance matrix (default
                            behavior is to call `get_dtw_matrix(seq_a, seq_b)`)
        path:               Optional: pre-computed path (default behavior is to
                            call `tslearn.dtw_path(seq_a, seq_b)`)
    
    Plots two sequences along with similarity matrix and path.
    """
    if distance_matrix is None:
        distance_matrix = get_dtw_matrix(seq_a, seq_b)
    if path is None:
        path, _ = dtw_path(seq_a, seq_b)
    
    top, center, left = get_dtw_axes()
    plot_distance_matrix(distance_matrix, center)
    plot_path(path, center)
    plot_left_seq(seq_b, left)
    plot_top_seq(seq_a, top)
    
    dtw_distance = dtw(seq_a, seq_b)
    left.set_title(f"DTW score = {dtw_distance}")

    # plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

def plot_path(path, ax):
    ax.plot(
        [j for (i, j) in path],
        [i for (i, j) in path],
        "r-",
        linewidth=3.
    )

def plot_left_seq(left_seq, left_ax):
    left_ax.plot(np.arange(left_seq.shape[0]), left_seq, "b-", linewidth=3.)
    left_ax.axis("off")
    left_ax.set_xlim((0, left_seq.shape[0] - 1))

def plot_top_seq(top_seq, top_ax):
    top_ax.plot(-top_seq, np.arange(top_seq.shape[0]), "g-", linewidth=3.)
    top_ax.axis("off")
    top_ax.set_ylim((0, top_seq.shape[0] - 1))

In [None]:
plot_dtw_kws(seq_a, seq_b)

Since every point on this sequence pair can be mapped to a point that has exactly the same value, we get a DTW score of 0, indicating perfect alignment.

Let's do the same with a sine and cosine wave.

In [None]:
sin_seq = np.sin(np.arange(0, 10, step=0.1))
cos_seq = np.cos(np.arange(0, 10, step=0.1))
plot_dtw_kws(sin_seq, cos_seq)

As we can see, since cosine and sine waves are basically the same function shifted by $\pi/2$, there's a vertical section near the beginning and a horizontal section near the end that accounts for the misalingment, but for the rest of the DTW graph the path line is perfectly diagonal. In this case, we cannot perfectly align each point of one sequence to a point with 0 distance from it in the other sequence, so we get a nonzero DTW score.

**EXERCISE 1:** Modify the code below to produce two pairs of sequences, the first pair where the DTW score is low (the sequences are very similar) and the second where the DTW score is high (the sequences are very different).

For the first pair, try to make the sequences have a similar *shape* but different *alignment*, like `seq_a, seq_b` from above. Feel free to change the length of the arrays, to set values manually, or to use some function (e.g. `np.sin`, `np.tan`, `np.exp`, `np.factorial`, ...)

In [None]:
# YOUR CODE HERE:
seq_a_lowscore = np.array([0,0,0,0,0,0])
seq_b_lowscore = np.array([0,0,0,0,0,0])


plot_dtw_kws(seq_a_lowscore, seq_b_lowscore)

In [None]:
# YOUR CODE HERE:

seq_a_highscore = np.array([0,0,0,0,0,0])
seq_b_highscore = np.array([1,1,1,1,1,1])


plot_dtw_kws(seq_a_highscore, seq_b_highscore)

Let's do a quick demo of DTW on sequences of MFCCs using the toy dataset from last week. Below I've written code that will load in the relevant data.

In [None]:
# load in the ordered array of words
words = np.load('data/words.npy')
words

In [None]:
# load in the matrix of MFCCs for the audio
mfcc_matrix = np.load('data/mfcc_matrix.npy')
mfcc_matrix.shape

In [None]:
# load in the word id for each MFCC frame
mfcc_word_ids = np.load('data/mfcc_word_ids.npy')
mfcc_word_ids.shape

**EXERCISE 2**: Write a function to get a matrix of all MFCCs for a given word (`np.argwhere(mfcc_word_ids=word_index)` will be helpful here)

In [None]:
def get_mfccs_for_word(
        word: str,
        mfcc_word_ids: np.ndarray,
        mfcc_matrix: np.ndarray
    ) -> np.ndarray:
    word_index = ...
    # YOUR CODE HERE
    return mfcc_matrix[:]

lawn_mfccs = get_mfccs_for_word('lawn', mfcc_word_ids, mfcc_matrix)
lawn_mfccs.shape

**EXERCISE 3:** Use the matrices computed above to:
1. For each of the five words 'lawn, lean, knee, kneel, gnaw' predict what word will be *closest* and what word will be *furthest* from it.
    - lawn:
    - lean:
    - knee:
    - kneel:
    - gnaw:
2. Get the DTW score for each word. You can plot it with `plot_dtw` if you wish. To just get the DTW score call `dtw(seq_a, seq_b)`.

In [None]:
for i, word_a in enumerate(words):
    for word_b in words[i+1:]:
        # YOUR CODE HERE: calculate the DTW score for word_a and word_b!
        dtw_score = ...
        print(f"DTW score between {word_a} and {word_b} is {dtw_score}")

Now that we've got some practice with MFCCs and in using them in DTW, we're going to start looking at some real KWS data. We'll be working with the [LibriSpeech](https://ieeexplore.ieee.org/document/7178964) and [LibriPhrase](https://arxiv.org/abs/2206.15400) datasets. LibriSpeech is an ASR dataset made from public domain audiobooks, and LibriPhrase is a KWS dataset where keywords were sampled from sentences in LibriSpeech. I've saved these datasets onto Witchking and provided some helper functions defined in `librikws.py` to load data from them.

In [None]:
keywords = get_unique_keywords()
print(f"{len(keywords)} keywords in dataset")
keywords[:10]

In [None]:
keyword, sentence = get_random_keyword_sentence_pair(keywords[42])
keyword

In [None]:
sentence

Let's write a function to get MFCCs + the first and second derivatives for a given audio. We'll use this as feature extraction for doing KWS.

In [None]:
def mfcc_w_deltas(audio: Union[str, np.ndarray], samplerate: Optional[int]=None) -> np.ndarray:
    if type(audio) is str:
        audio, samplerate = librosa.load(audio)
    mfcc = librosa.feature.mfcc(y=audio, sr=samplerate, n_mfcc=13)
    d1 = librosa.feature.delta(mfcc, order=1)
    d2 = librosa.feature.delta(mfcc, order=2)

    feature = np.concat([mfcc, d1, d2], axis=0)
    return feature

_, ax = plt.subplots()
ax.imshow(mfcc_w_deltas(get_librispeech_path(sentence['file'])))
plt.show()

In order to visualize DTW between a keyword and a sentence, let's write another function to plot a sentence and keyword similar to the style used in `plot_dtw` above.

In [None]:
def plot_sentence(
        row: pd.Series,
        tier: str='words',
        stop_tokens: List[str] = ['sp', 'sil'],
        ax: Optional[plt.Axes] = None,
        audio_type: Literal['wav', 'mfcc']='wav'
    ):
    if ax is None:
        _, ax = plt.subplots()
    audio_path = get_librispeech_path(row['file'])
    wav, samplerate = librosa.load(audio_path)
    textgrid_df = get_librispeech_textgrid(row['file'])

    mfcc = mfcc_w_deltas(wav, samplerate)
    if audio_type == 'mfcc':
        text_y=30
        ax.imshow(mfcc)
        audio = mfcc
    else:
        audio = wav
        ax.plot(audio)
        text_y=-0.5
        ax.set_ylim((-1,1))
        
    max_time = textgrid_df['end'].max()
    max_X = audio.shape[-1]
    ax.set_xlim(0, max_X)


    midpoint=(textgrid_df['start']+textgrid_df['end'])/2
    midpoint_relative = midpoint/max_time
    midpoint_X = midpoint_relative*max_X
    textgrid_df['midpoint_X']=midpoint_X

    plot_word = lambda row: ax.text(
        x = row['midpoint_X'],
        y=text_y,
        s = row['value'],
        rotation=90,
        fontsize=8,
    )
    tier_mask = textgrid_df['tier']==tier
    stoptoken_mask = textgrid_df['value'].isin(stop_tokens)
    textgrid_df[tier_mask&~stoptoken_mask].apply(plot_word, axis=1)


    return mfcc.T

plot_sentence(sentence)
plt.show()

The `audio_type` arg let's us choose between visualizing the waveform or MFCCs.

In [None]:
plot_sentence(sentence, audio_type='mfcc')
plt.show()

In [None]:
def plot_keyword(
        row: pd.Series,
        ax: Optional[plt.Axes] = None,
        audio_type: Literal['wav', 'mfcc']='wav',
):
    audio_path = get_libriphrase_audio_path(row['file'])

    if ax is None:
        _, ax = plt.subplots()
    mfcc = mfcc_w_deltas(audio_path)
    if audio_type == 'mfcc':
        ax.imshow(mfcc.T)
        text_x=30
        max_y=mfcc.shape[-1]

    else:
        wav, _ = librosa.load(audio_path)
        time = np.arange(wav.shape[-1])
        ax.plot(wav, time)
        ax.set_xlim((-1,1))
        text_x=-0.5
        max_y=wav.shape[-1]

    text_y = max_y/2

    ax.text(
        y=text_y,
        x=text_x,
        s = row['keyword'],
        rotation=90,
        fontsize=20,
        va='center',
        ha='center'
    )


    ax.set_ylim(0, max_y)

    return mfcc.T
plot_keyword(keyword)
plt.show()

In [None]:
plot_keyword(keyword, audio_type='mfcc')
plt.show()

Let's use these methods to visualize DTW between the MFCCs of the keyword and keyphrase.

In [None]:
def plot_dtw_mfccs(keyword, sentence):
    """
    Arguments:
        keyword:            pandas.Series containing data for keyword
        sentence:           pandas.Series containing data for sentence
        distance_matrix:    Optional: pre-computed distance matrix (default
                            behavior is to call `get_dtw_matrix(seq_a, seq_b)`)
        path:               Optional: pre-computed path (default behavior is to
                            call `tslearn.dtw_path(seq_a, seq_b)`)
    
    Plots two sequences along with similarity matrix and path.
    """
    left, center, top = get_dtw_axes()

    keyword_mfcc=plot_keyword(keyword, ax=left)
    sentence_mfcc=plot_sentence(sentence, ax=top)
    # dtw_matrix=dtw_matrix = get_dtw_matrix(
    #     keyword_mfcc,
    #     sentence_mfcc,
    #     distance_funct=cosine,
    # )
    distance_matrix = cdist(keyword_mfcc, sentence_mfcc)
    path, _ = dtw_path(keyword_mfcc, sentence_mfcc)
    plot_distance_matrix(distance_matrix, center)
    plot_path(path, center)

    dtw_distance = dtw(keyword_mfcc, sentence_mfcc) / sentence_mfcc.shape[0]
    top.set_title(f"DTW score = {dtw_distance}")

    plt.show()

plot_dtw_mfccs(keyword, sentence)

**EXERCISE 4:** Where the DTW path is more vertical, we are "traversing" along the keyword, as opposed to horizontal lines where we are traversing along the sentence but not along the keyword. Since the keyword is contained within the sentence, we should expect most of the vertical movement to occur at the keyword's location in the audio. Is this actually the case? In the markdown cell below, note the words in the sentence where you see vertical movement along the DTW path, and whether this is in line with what you expected.

Note: The keyword here is set to 'I asked', but the sentence was randomly generated. If the sentence is too long and the words on top are too crowded, try running the cells above again until a shorter sentence is chosen.

YOUR ANWER HERE:

Now, it's not really ideal to compare an *entire* sentence of MFCCs to the keyphrase using DTW. A better strategy is to divide the sentence into a series of windows then compare each window to the keyword. Hopefully, the window with the minimum score will match the keyword (I've indicated the minium DTW score with a vertical red line).

In [None]:
def plot_sliding_dtw(keyword, sentence):
    """
    Arguments:
        keyword:            pandas.Series containing data for keyword
        sentence:           pandas.Series containing data for sentence
        distance_matrix:    Optional: pre-computed distance matrix (default
                            behavior is to call `get_dtw_matrix(seq_a, seq_b)`)
        path:               Optional: pre-computed path (default behavior is to
                            call `tslearn.dtw_path(seq_a, seq_b)`)
    
    Plots two sequences along with similarity matrix and path.
    """
    left, center, top = get_dtw_axes()

    keyword_mfcc=plot_keyword(keyword, ax=left)
    sentence_mfcc=plot_sentence(sentence, ax=top)

    dtw_scores = get_windowed_dtw(keyword_mfcc, sentence_mfcc)
    min_idx = np.argmin(dtw_scores)
    plot_distance_matrix(dtw_scores, center)
    center.vlines(min_idx, -0.5, 0.5, 'red')

    top.set_title(f"Min DTW score = {dtw_scores.min()}")

    plt.show()

def get_windowed_dtw(keyword_mfcc, sentence_mfcc):
    window_len = min(int(keyword_mfcc.shape[0]*1.5), sentence_mfcc.shape[0])
    window_shape = (window_len, keyword_mfcc.shape[1])
    sentence_windows = np.lib.stride_tricks.sliding_window_view(
        sentence_mfcc,
        window_shape,
    ).squeeze()
    dtw_scores = np.array([dtw(keyword_mfcc, window) for window in sentence_windows])
    dtw_scores = dtw_scores[np.newaxis,:]
    dtw_scores/=keyword_mfcc.shape[0]
    return dtw_scores

plot_sliding_dtw(keyword, sentence)

Let's put this to the test with a small dataset. I've saved a csv file with a list of 10 keywords where each keywords is paired with 10 positive sentences (that contain the keyword) and 20 negative (that don't contain the keyword). The imbalance here is intentional: overall, we should expect to come across more instances of negative sentences than positive for any given keyword when applying KWS to novel data.

In [None]:
kws_df = get_samespeaker_kws_df()
kws_df.head()

Let's add a column containing the DTW score for each row, and then z-score it.

In [None]:
def get_dtw_prob(row):
    keyword_mfcc = mfcc_w_deltas(get_libriphrase_audio_path(row['keyword_file']))
    sentence_mfcc = mfcc_w_deltas(get_librispeech_path(row['sentence_file']))

    return get_windowed_dtw(keyword_mfcc, sentence_mfcc).min()

kws_df['dtw_score']=kws_df.progress_apply(get_dtw_prob, axis=1)
kws_df.head()

In [None]:
# z-score dtw scores
kws_df['dtw_score_norm'] = (kws_df['dtw_score']-kws_df['dtw_score'].mean())\
    / kws_df['dtw_score'].std()
# apply sigmoid so scores are valued between 0 and 1
kws_df['dtw_score_norm']= kws_df['dtw_score_norm'].apply(lambda x: 1/(1+np.exp(-x)))
kws_df['dtw_score_norm'].plot()

Hopefully, a keyword-positive sentence pair will have a low DTW score and a keyword-negative sentence pair will have a high DTW score. Let's look at 

In [None]:
mask = kws_df['keyword'].isin(["commander", "japanese"])
sns.scatterplot(kws_df[:], y='label', x='dtw_score_norm', hue='keyword')
plt.show()

Here's an [article](https://www.geeksforgeeks.org/machine-learning/auc-roc-curve/) and a [video with cringy intro song](https://www.youtube.com/watch?v=4jRBRDbJemM) on ROC/AUC.

In [None]:

Y = kws_df['label']
# Y_hat is NEGATIVE dtw_score_norm
# because greater DTW score = more distance
# i.e. LESS similarity
Y_hat = -kws_df['dtw_score_norm']

fpr, tpr, thresholds = roc_curve(Y, Y_hat)
auc = roc_auc_score(Y, Y_hat)
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc:.2f})")
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier') # Diagonal line for random classifier
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.grid(True)
plt.show()

**EXERCISE 5:** Inspect the ROC curve and AUC value for each keyword individually and note which words have particularly low or high values. Remember that a low AUC means the word is harder to classify and a higher AUC means the word is easier to classify. Do you notice any trends?

In [None]:
kws_targets = kws_df['keyword'].unique()
kws_targets

In [None]:
target_keyword = kws_targets[0]
mask = kws_df['keyword']==target_keyword

Y = kws_df[mask]['label']
Y_hat = -kws_df[mask]['dtw_score_norm']

fpr, tpr, thresholds = roc_curve(Y, Y_hat)
auc = roc_auc_score(Y, Y_hat)
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc:.2f})")
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier') # Diagonal line for random classifier
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'ROC Curve for keyword "{target_keyword}"')
plt.legend()
plt.grid(True)
plt.show()

**YOUR ANSWER HERE**:

**EXERCISE 6**: Let's try to improve our AUC score from 0.65! We can do this by tinkering with the features and with the way DTW is scored. Let's start with the features. So we've been using `MFCCs` with 13 coefficients. Let's try modifying this. Here are a few ideas:
1. Change the number of coefficients `n_mfcc`. What happens to AUC when we add more coefficients? When we use less?
2. Add *energy* to our feature vector. This can be done using `librosa.feature.rms`. You can concatenate energy to the MFCCs like this:

```python
energy = librosa.feature.rms(y=audio)
feature = np.concat([mfcc, d1, d2, energy])
```
3. Add `d1_energy` and `d2_energy` (the first and second derivatives of energy) to our feature. Just repeat the steps for getting `d1` and `d2` of the MFCCs and concatenate to the feature.
4. Why stop there? There are multiple audio features to play around with among the [Librosa features](https://librosa.org/doc/0.11.0/feature.html). I'd focus on spectral features, e.g. `librosa.feature.melspectrogram` or `librosa.feature.spectral_bandwidth`.

In [None]:
def mfcc_alt(audio: Union[str, np.ndarray], samplerate: Optional[int]=None) -> np.ndarray:
    if type(audio) is str:
        audio, samplerate = librosa.load(audio)
    mfcc = librosa.feature.mfcc(y=audio, sr=samplerate, n_mfcc=13)
    d1 = librosa.feature.delta(mfcc, order=1)
    d2 = librosa.feature.delta(mfcc, order=2)

    feature = np.concatenate([mfcc, d1, d2], axis=0)
    return feature

mfcc_alt(get_libriphrase_audio_path(kws_df.iloc[0]['keyword_file'])).shape

As hinted above, we can modify how DTW is calculated as well. We can do this by changing the distance function used to compute distance between feature vectors. See the [scipy documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html) for more information on different distance metrics. You can change what distance metric `tslearn` uses by simply changing the `dtw_metric` variable in the cell below.

In [None]:
def get_windowed_dtw_alt(keyword_mfcc, sentence_mfcc):
    window_len = min(int(keyword_mfcc.shape[0]*1.5), sentence_mfcc.shape[0])
    window_shape = (window_len, keyword_mfcc.shape[1])
    sentence_windows = np.lib.stride_tricks.sliding_window_view(
        sentence_mfcc,
        window_shape,
    ).squeeze()

    dtw_metric = 'euclidean' # MODIFY THIS LINE TO CHANGE THE METRIC

    dtw_scores = np.array([dtw_path_from_metric(keyword_mfcc, window, metric=dtw_metric)[1] for window in sentence_windows])
    dtw_scores = dtw_scores[np.newaxis,:]
    dtw_scores/=keyword_mfcc.shape[0]
    return dtw_scores

In [None]:

def get_dtw_prob_alt(row):
    keyword_mfcc = mfcc_alt(get_libriphrase_audio_path(row['keyword_file']))
    sentence_mfcc = mfcc_alt(get_librispeech_path(row['sentence_file']))

    return get_windowed_dtw_alt(keyword_mfcc, sentence_mfcc).min()

kws_df['dtw_score_alt']=kws_df.progress_apply(get_dtw_prob_alt, axis=1)

# z-score dtw scores
kws_df['dtw_score_alt'] = (kws_df['dtw_score_alt']-kws_df['dtw_score_alt'].mean())\
    / kws_df['dtw_score_alt'].std()
# apply sigmoid so scores are valued between 0 and 1
kws_df['dtw_score_alt']= kws_df['dtw_score_alt'].apply(lambda x: 1/(1+np.exp(-x)))

Y = kws_df['label']
Y_hat = -kws_df['dtw_score_alt']

fpr, tpr, thresholds = roc_curve(Y, Y_hat)
auc = roc_auc_score(Y, Y_hat)
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc:.2f})")
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier') # Diagonal line for random classifier
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.grid(True)
plt.show()

**YOUR ANSSER HERE:** In this cell, write down what changes you made to the feature generation and scoring metric and how that effected the AUC score.

