In [None]:
from pathlib import Path

import csv
import traceback
import time
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
from model.config import (
    CHUNK_SIZE,
    DTW_WINDOW_SIZE,
    FILENAME_PREFIX,
    FRAME_RATE,
    HOP_LENGTH,
    NOTE_ANN_DIR,
    SAMPLE_RATE,
    AUDIO_DIR,
    TOLERANCES,
    WP_ANN_DIR,
    NORM,
    TOLERANCES,
)
from model.utils import get_stats, run_evaluation
from model.oltw import OLTW
from model.stream_processor import StreamProcessor

pd.set_option("display.max_rows", 1000)

FEATURE_RATE = FRAME_RATE
STEP_WEIGHTS = np.array([1.5, 1.5, 2.0])
THRESHOLD_REC = 10**6
FIG_SIZE = (9, 3)
GAMMA = 10.0
CURRENT_DIR = Path("./")
FIGURE_DIR = "figs"
ALL_SONGS = [f"{i:02d}" for i in range(1, 25)]
singer1 = "HU33"
singer2 = "SC06"

In [None]:
# Run Evaluation for one song
SONG_IDS = ["03"]
MAX_RUN_COUNT = 3
metric = "cosine"
FEATURES = ["chroma", "phoneme"]
wp_dict = dict()
for song_id in SONG_IDS:
    ref_audio_path = AUDIO_DIR / f"audio_{FILENAME_PREFIX}{song_id}_ref.wav"
    target_audio_path = AUDIO_DIR / f"audio_{FILENAME_PREFIX}{song_id}_target.wav"
    duration = int(librosa.get_duration(path=target_audio_path.as_posix()))
    print(f"[song_id: {song_id}] duration: {duration} sec")

    sp = StreamProcessor(
        sample_rate=SAMPLE_RATE,
        chunk_size=CHUNK_SIZE,
        hop_length=HOP_LENGTH,
        features=FEATURES,
    )

    oltw = OLTW(
        sp,
        ref_audio_path.as_posix(),
        window_size=DTW_WINDOW_SIZE,
        sample_rate=SAMPLE_RATE,
        hop_length=HOP_LENGTH,
        max_run_count=MAX_RUN_COUNT,
        metric=metric,
        features=FEATURES,
    )
    try:
        oltw.run(mock=True, mock_audio_path=target_audio_path.as_posix())
    except Exception as e:
        print(f"error! : {str(e)}, {type(e)}")
        traceback.print_tb(e.__traceback__)
        oltw.stop()

    print(f"=====================oltl run ended=====================")
    
    # plot DTW path with ground-truth labels
    ref_ann_file = NOTE_ANN_DIR / f"ann_{FILENAME_PREFIX}{song_id}_ref.csv"
    target_ann_file = NOTE_ANN_DIR / f"ann_{FILENAME_PREFIX}{song_id}_target.csv"
    note_ann_ref = pd.read_csv(
        filepath_or_buffer=ref_ann_file.as_posix(), delimiter=","
    )["start"]
    note_ann_target = pd.read_csv(
        filepath_or_buffer=target_ann_file.as_posix(), delimiter=","
    )["start"]

    dist = scipy.spatial.distance.cdist(
        oltw.ref_features.T,
        oltw.target_features[:, : oltw.target_pointer].T,
        metric=metric,
    )  # [d, wy]
    plt.figure(figsize=(10, 10))
    plt.imshow(dist.T, aspect="auto", origin="lower", interpolation="nearest")

    x, y = zip(*oltw.candi_history)
    for n in range(len(x)):
        plt.scatter(x[n], y[n], color="blue", alpha=0.5, s=50)

    # plot ground truth
    for ref, target in zip(note_ann_ref, note_ann_target):
        plt.scatter(ref * FRAME_RATE, target * FRAME_RATE, color="r", s=100)

    plt.xlabel("Reference frame")
    plt.ylabel("Target frame")
    plt.title("DTW path with ground truth labels")

    # compare two features scale
    max_step = np.max((oltw.ref_pointer, oltw.target_pointer))
    plt.figure(figsize=(15, 5))
    ax = plt.subplot(211)
    plt.imshow(
        oltw.ref_features[:, : oltw.ref_pointer],
        aspect="auto",
        interpolation="nearest",
        origin="lower",
    )
    plt.xlim(0, max_step)
    plt.colorbar()
    plt.subplot(212)
    plt.imshow(
        oltw.target_features[:, : oltw.target_pointer],
        aspect="auto",
        interpolation="nearest",
        origin="lower",
    )
    plt.xlim(0, max_step)
    plt.colorbar()

    # save online DTW result
    online_wp_path = WP_ANN_DIR / f"wp_{FILENAME_PREFIX}{song_id}_online.csv"
    with open(online_wp_path.as_posix(), "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(oltw.candi_history)

    # online alignment evaluation
    wp = np.genfromtxt(online_wp_path.as_posix(), delimiter=",").T
    wp_dict[song_id] = {
        "wp_chroma_dlnco": wp,
    }

    # run single evaluation
    note_ann_file_1 = NOTE_ANN_DIR / f"ann_{FILENAME_PREFIX}{song_id}_ref.csv"
    note_ann_file_2 = NOTE_ANN_DIR / f"ann_{FILENAME_PREFIX}{song_id}_target.csv"
    (
        online_with_gt_mean,
        online_with_gt_std,
        online_with_gt_misalignments,
        online_with_gt_abs_err,
        online_with_gt_err,
    ) = get_stats(
        wp=wp,
        note_ann_filepath_1=note_ann_file_1.as_posix(),
        note_ann_filepath_2=note_ann_file_2.as_posix(),
        tolerances=TOLERANCES,
    )
    stats_dict = {song_id: dict()}
    stats_dict[song_id]["chroma_dlnco"] = {
        "mean": online_with_gt_mean,
        "std": online_with_gt_std,
        "misalignments": online_with_gt_misalignments,
        "absolute_errors": online_with_gt_abs_err,
    }

    rows = pd.MultiIndex.from_product([stats_dict.keys()], names=["Song ID"])
    columns = pd.MultiIndex.from_product(
        [["Chroma & DLNCO"], TOLERANCES], names=["Feature Type", "$\u03C4$ (ms)"]
    )
    data = np.zeros((len(stats_dict), len(online_with_gt_misalignments)))
    for row_idx, song_id in enumerate(stats_dict):
        data[row_idx, : len(online_with_gt_misalignments)] = (
            stats_dict[song_id]["chroma_dlnco"]["misalignments"] * 100
        )

    df = pd.DataFrame(data, index=rows, columns=columns)
    with pd.option_context("display.float_format", "{:0.2f}".format):
        ipd.display(df)

stats_dict = run_evaluation(wp_dict, norm=NORM, metric=metric, features=FEATURES)
print(f"parameter for test: norm_type={NORM}, metric={metric}")

In [None]:
# Run On-Line Evaluation for all songs
start_time = time.time()
MAX_RUN_COUNT = 3
metric = "cosine"  # cosine, euclidean
FEATURES = ["chroma", "phoneme"]  # chroma, mel, mfcc, phoneme
wp_dict = dict()
for song_id in ALL_SONGS:
    ref_audio_path = AUDIO_DIR / f"audio_{FILENAME_PREFIX}{song_id}_ref.wav"
    target_audio_path = AUDIO_DIR / f"audio_{FILENAME_PREFIX}{song_id}_target.wav"
    duration = int(librosa.get_duration(path=target_audio_path.as_posix()))

    sp = StreamProcessor(
        sample_rate=SAMPLE_RATE,
        chunk_size=CHUNK_SIZE,
        hop_length=HOP_LENGTH,
        features=FEATURES,
    )

    oltw = OLTW(
        sp,
        ref_audio_path.as_posix(),
        window_size=DTW_WINDOW_SIZE,
        sample_rate=SAMPLE_RATE,
        hop_length=HOP_LENGTH,
        max_run_count=MAX_RUN_COUNT,
        metric=metric,
        features=FEATURES,
    )
    try:
        oltw.run(mock=True, mock_audio_path=target_audio_path.as_posix())
    except Exception as e:
        print(f"error! : {str(e)}, {type(e)}")
        traceback.print_tb(e.__traceback__)
        oltw.stop()

    # save online DTW result
    online_wp_path = WP_ANN_DIR / f"wp_{FILENAME_PREFIX}{song_id}_online.csv"
    with open(online_wp_path.as_posix(), "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(oltw.candi_history)

    # online alignment evaluation
    wp = np.genfromtxt(online_wp_path.as_posix(), delimiter=",").T
    wp_dict[song_id] = {
        "wp_chroma_dlnco": wp,
    }

stats_dict = run_evaluation(wp_dict, norm=NORM, metric=metric, features=FEATURES)
print(f"parameter for test: norm_type={NORM}, metric={metric}, features={FEATURES}")
print(f"{time.time() - start_time} sec elapsed")