In [1]:
import glob
from collections import defaultdict

import pandas as pd
from pymcd.mcd import Calculate_MCD

mcd_toolbox = Calculate_MCD(MCD_mode="dtw_sl")

In [3]:
def get_audio_pairs(model_name: str) -> dict:
    """Gets audio pairs of target and converted voice."""

    target_audios = glob.glob("target/MGL1/*.wav")
    converted_audios = glob.glob(f"{model_name}/wav/*.wav")

    pairs = defaultdict(list)
    for target_audio in target_audios:
        target_audio_name = target_audio.split("/")[-1].replace(".wav", "")
        for converted_audio in converted_audios:
            converted_audio_name = converted_audio.split("/")[-1].replace(
                ".wav", ""
            )
            if target_audio_name in converted_audio_name:
                pairs[target_audio_name].append(converted_audio_name)
    return pairs

In [6]:
result = []
for model_name in [
    "vq_wav2vec_taco2",
    "pretrained_vq_wav2vec_taco2",
    "wav2vec2_taco2",
]:
    audio_pairs = get_audio_pairs(model_name=model_name)

    model_result = {model_name: {}}
    for target_audio in audio_pairs.keys():
        for converted_audio in audio_pairs[target_audio]:
            model_result[model_name][
                converted_audio
            ] = mcd_toolbox.calculate_mcd(
                f"target/MGL1/{target_audio}.wav",
                f"{model_name}/wav/{converted_audio}.wav",
            )

    result.append(model_result)

In [14]:
# Extracting the keys and values from the result
keys = [list(d.keys())[0] for d in result]
values = [list(d.values())[0] for d in result]

# Creating the DataFrame
df = pd.DataFrame(values, index=keys).round(2).transpose()
df.to_csv("mcd.csv")