In [1]:
pip install pyworld

Collecting pyworld
  Downloading pyworld-0.3.4.tar.gz (251 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting cython>=0.24 (from pyworld)
  Using cached Cython-3.0.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)
Using cached Cython-3.0.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Building wheels for collected packages: pyworld
  Building wheel for pyworld (pyproject.toml) ... [?25ldone
[?25h  Created wheel for pyworld: filename=pyworld-0.3.4-cp38-cp38-linux_x86_64.whl size=1055233 sha256=db2bcd11c62fdd31a110de1a9c3aafbac95f3ce10cbbbef49cd12eb58e7bf649
  Stored in directory: /home/sun/.cache/pip/wheels/22/c2/50/1c11318f09454d3319f4baa8110369bbbcfd3ba4b92da79a92
Successfully built pyworld
Installing collected packages: cython, pyworld
Successfully installed cython-3.0.11 pyworld-0.3.4
Note: you may need to

In [2]:
pip install pysptk

Collecting pysptk
  Downloading pysptk-1.0.1.tar.gz (461 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: pysptk
  Building wheel for pysptk (pyproject.toml) ... [?25ldone
[?25h  Created wheel for pysptk: filename=pysptk-1.0.1-cp38-cp38-linux_x86_64.whl size=1502820 sha256=02365eac06e8f144199f97a0bf86bc3e3d772dd4955008243e10abcbdaa3e599
  Stored in directory: /home/sun/.cache/pip/wheels/09/a9/55/6c92f0b2dc62a120aab09b01eca76a3faa381a25034c714475
Successfully built pysptk
Installing collected packages: pysptk
Successfully installed pysptk-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [1]:

import os
import math
import glob
import librosa
import pyworld
import pysptk
import numpy as np
import matplotlib.pyplot as plot

In [22]:
def load_wav(wavfile, sr=16000):
    """
    加载音频文件并返回音频信号
    """
    wav, _ = librosa.load(wavfile, sr=sr, mono=True)
    return wav

In [23]:
def extract_mcep(wavfile, mcep_target_directory, alpha=0.65, fft_size=512, mcep_size=24, frame_period=5.0):
    """
    提取音频的梅尔倒谱系数(MCEP)，并保存为 .npy 文件
    """
    if not os.path.exists(mcep_target_directory):
        os.makedirs(mcep_target_directory)
    
    loaded_wav_file = load_wav(wavfile)
    
    # 提取频谱包络
    _, spectral_envelop, _ = pyworld.wav2world(loaded_wav_file.astype(np.float64), fs=22050, frame_period=frame_period, fft_size=fft_size)
    
    # 提取MCEP
    try:
        mcep = pysptk.sptk.mcep(spectral_envelop, order=mcep_size, alpha=alpha)
    except RuntimeError as e:
        print(f"Error processing {wavfile}: {e}")
        return
    
    # 保存MCEP为 .npy 文件
    fname = os.path.basename(wavfile).split('.')[0]
    np.save(os.path.join(mcep_target_directory, fname + '.npy'), mcep, allow_pickle=False)
    print(f'Saved MCEP for {wavfile} to {mcep_target_directory}')

In [24]:
# 解析映射文件
def parse_mapping_file(mapping_file_path):
    """
    解析映射文件，返回生成语音文件名、内容语音和说话者特征语音的配对列表
    """
    mapping = []
    with open(mapping_file_path, 'r') as file:
        for line in file:
            parts = line.strip().split('|')
            if len(parts) == 3:
                generated_wav, content_wav, speaker_wav = parts
                mapping.append((generated_wav, content_wav, speaker_wav))
    return mapping

In [25]:
# 计算 MCD 的函数
def calculate_mcd(mapping, generated_base_path, content_base_path, speaker_base_path, cost_function):
    """
    基于映射文件计算 MCD
    """
    min_cost_tot = 0.0
    total_frames = 0
    
    for generated_wav, content_wav, speaker_wav in mapping:
        # 构建MCEP文件路径
        generated_mcep_path = os.path.join(mcep_target_dir, os.path.basename(generated_wav).replace('.wav', '.npy'))
        speaker_mcep_path = os.path.join(mcep_target_dir, os.path.basename(speaker_wav).replace('.wav', '.npy'))
        
        # 检查文件是否存在
        if not os.path.exists(generated_mcep_path) or not os.path.exists(speaker_mcep_path):
            print(f"文件 {generated_mcep_path} 或 {speaker_mcep_path} 不存在，跳过此对。")
            continue
        
        # 加载 MCEP 特征
        generated_mcep = np.load(generated_mcep_path)
        speaker_mcep = np.load(speaker_mcep_path)
        
        frame_no = len(generated_mcep)
        
        # 计算 DTW 和 MCD
        min_cost, _ = librosa.sequence.dtw(generated_mcep[:, 1:].T, speaker_mcep[:, 1:].T, metric=cost_function)
        
        # 累加代价
        min_cost_tot += np.mean(min_cost)
        total_frames += frame_no
    
    # 计算平均 MCD
    if total_frames > 0:
        mcd = min_cost_tot / total_frames
    else:
        mcd = float('inf')  # 如果没有帧被处理，返回无穷大
    
    return mcd, total_frames

In [26]:
def MCD(x, y):
    log_spec_dB_const = 10.0 / math.log(10.0) * math.sqrt(2.0)
    diff = x - y
    
    return log_spec_dB_const * math.sqrt(np.inner(diff, diff))

In [27]:
# 主流程函数
def process_conversion(mapping_file_path, generated_base_path, content_base_path, speaker_base_path, mcep_target_dir, cost_function=MCD):
    # 解析映射文件
    mapping = parse_mapping_file(mapping_file_path)

    # 提取内容和说话者的MCEP特征
    for generated_wav, content_wav, speaker_wav in mapping:
        extract_mcep(os.path.join(generated_base_path, generated_wav), mcep_target_dir)
        extract_mcep(os.path.join(content_base_path, content_wav), mcep_target_dir)
        extract_mcep(os.path.join(speaker_base_path, speaker_wav), mcep_target_dir)

    # 计算 MCD
    mcd, frames_used = calculate_mcd(mapping, generated_base_path, content_base_path, speaker_base_path, cost_function)

    print(f'MCD = {mcd} dB, total frames used = {frames_used}')

In [28]:
# 示例用法 freevc
mapping_file_path = '/home/sun/FreeVC/convert_F.txt'
generated_base_path = '/home/sun/FreeVC/outputs/freevc_test'
content_base_path = '/home/sun/FreeVC/outputs/wav_o'
speaker_base_path = '/home/sun/FreeVC/outputs/wav_o'
mcep_target_dir = '/home/sun/FreeVC/outputs/mceps_F'# 存储 MCEP 的目录
#autovc
# mapping_file_path = '/home/sun/FreeVC/convert_A.txt'
# generated_base_path = '/home/sun/FreeVC/outputs/autovc_test'
# content_base_path = '/home/sun/FreeVC/outputs/wav_o'
# speaker_base_path = '/home/sun/FreeVC/outputs/wav_o'
# mcep_target_dir = '/home/sun/FreeVC/outputs/mceps_A'
# #starganvc
# mapping_file_path = '/home/sun/FreeVC/convert_S.txt'
# generated_base_path = '/home/sun/FreeVC/outputs/starganvc_test'
# content_base_path = '/home/sun/FreeVC/outputs/wav_o'
# speaker_base_path = '/home/sun/FreeVC/outputs/wav_o'
# mcep_target_dir = '/home/sun/FreeVC/outputs/mceps_S'


In [30]:
process_conversion(mapping_file_path, generated_base_path, content_base_path, speaker_base_path, mcep_target_dir)

Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTM1.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p225_001.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p226_002.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTM2.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p225_002.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p227_003.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTM3.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p225_003.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p232_004.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTM4.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wa

theq() : determinant of the normal matrix is too small!
mcep : Error in theq() at 9th iteration !


Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p229_003.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p231_004.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTW10.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p231_001.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p225_002.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTW11.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p231_002.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p228_003.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/freevc_test/WTW12.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/wav_o/p231_003.wav to /home/sun/FreeVC/outputs/mceps_F
Saved MCEP for /home/sun/FreeVC/outputs/w