In [1]:
#@title 定义基本工具函数
import tqdm
import time
def do_repeat(count: int, progress_bar=None, progress_title=None):
    def fn(func):
        ret_list = []
        for i in (tqdm.tqdm(range(count), desc=progress_title) if progress_bar is None else progress_bar(range(count), title=progress_title)):
            ret = func(step=i, count=count)
            ret_list.append(ret)
        return ret_list
    return fn

def measure_time(fn):
    def wrapper(*args, **kwargs):
        start_t = time.perf_counter()
        ret = fn(*args, **kwargs)
        end_t = time.perf_counter()
        return ret, (end_t - start_t)
    return wrapper

def get_avg_value(list, start_index=0):
    v = [x for _,x in list[start_index:]]
    return 0 if len(v) == 0 else (sum(v) / len(v))

In [2]:
#@title 全局配置定义
verbose = False

In [3]:
#@title 测试TTS
onnx_params = {
    'provider_options': [{
        'device_type': 'GPU'
    }]
}

#session_opts = ort.SessionOptions()

print("Import MeloTTS_ONNX...")
from melo_onnx import MeloTTS_ONNX

print("Create instance of MeloTTS_ONNX...")
tts = MeloTTS_ONNX("./models/zh_mix_en", verbose=verbose, execution_provider="CPU", onnx_params=onnx_params)

print("Speak some sentences and save to test.wav...")
from datetime import datetime
now = datetime.now()
now_text = now.strftime("%m月%d日%H点%M分%S秒")
# 格式化为字符串
text = f"现在是{now_text}。今天的天气真nice。咱们一起去郊游吧。然后，咱们到hotel、garden里面去开个party……好不好啊？"
audio = tts.speak(text, 'ZH')
print(audio)

import soundfile
soundfile.write("./test.wav", audio, samplerate=tts.sample_rate)

print("Measure time...")

@do_repeat(10)
@measure_time
def tts_time(**kwargs):
    return tts.speak('今天天气真nice', 'ZH')

print(f"Average time is {get_avg_value(tts_time)}s")

Import MeloTTS_ONNX...
Create instance of MeloTTS_ONNX...


Building prefix dict from the default dictionary ...
Loading model from cache C:\Temp\jieba.cache


Speak some sentences and save to test.wav...
['现在是10月24日10点07分45秒.', '今天的天气真nice.', '咱们一起去郊游吧.', '然后,', '咱们到hotel、garden里面去开个party.', '.', '好不好啊.']
['现在是10月24日10点07分45秒.', '今天的天气真nice.', '咱们一起去郊游吧.', '然后, 咱们到hotel、garden里面去开个party.', '. 好不好啊.']


Loading model cost 0.741 seconds.
Prefix dict has been built successfully.


[-1.4915683e-05  8.9676296e-06  5.4966172e-06 ...  0.0000000e+00
  0.0000000e+00  0.0000000e+00]
Measure time...


NameError: name 'do_repeat' is not defined

In [1]:
#@title 句子分割测试调试
from melo_onnx.text.split_utils import split_sentence
s = split_sentence("今天的天气真nice。咱们一起去郊游吧。然后，咱们到hotel、garden里面去开个party……好不好啊？", language_str="ZH_MIX_EN")
print(s)

['今天的天气真nice.', '咱们一起去郊游吧.', '然后, 咱们到hotel、garden里面去开个party.', '. 好不好啊.']


In [3]:
#@title 测试音色克隆
from melo_onnx import OpenVoiceToneClone_ONNX
tc = OpenVoiceToneClone_ONNX("./models/tone_clone")
import soundfile
a = soundfile.read("..\\OpenVoice\\audio_segments\\shenmiao_2309021235\\wavs\\shenmiao_2309021235_seg0.wav", dtype='float32')
aa = tc.resample(a[0], a[1])
b = tc.extract_tone_color(aa)
c = soundfile.read("C:\\Users\\Season\\Desktop\\mysound\\myself\\001.wav", dtype='float32')
cc = tc.resample(c[0], c[1])
d = tc.tone_clone(cc, b)
print(d.shape)
print(tc.sample_rate)
soundfile.write("./tc_test.wav", d, tc.sample_rate)

@do_repeat(10)
@measure_time
def extract_tone_color_time(**kwargs):
    return tc.extract_tone_color(aa)

print("average time of extracting the tone color is ", get_avg_value(extract_tone_color_time))

@do_repeat(10)
@measure_time
def tone_clone_time(**kwargs):
    return tc.tone_clone(cc, b)

print("average time of cloning the tone color is ", get_avg_value(tone_clone_time))

(135680,)
22050


100%|██████████| 10/10 [00:00<00:00, 16.09it/s]


average time of extracting the tone color is  0.06019485997967422


  0%|          | 0/10 [00:02<?, ?it/s]


KeyboardInterrupt: 