# 下載程式碼
使用git clone 指令從github下載程式碼。

In [0]:
!git clone https://github.com/BogiHsu/2019-AI-Summer-School-Voice-Conversion.git

# 安裝必要套件
用pip3安裝requierments.txt中所列出的必要套件。

In [0]:
import os
os.chdir('2019-AI-Summer-School-Voice-Conversion')
!pip3 install -r requirements.txt

# 下載訓練好的模型

下載訓練好的模型model.pkl，並放到pkl資料夾中。

In [0]:
import os
os.chdir('2019-AI-Summer-School-Voice-Conversion')
!wget http://speech.ee.ntu.edu.tw/~jjery2243542/model.pkl
!mkdir pkl
!mv model.pkl pkl/model.pkl

# 範例音檔
在samples中有225.wav~229.wav等五個不同speaker的聲音範例。

In [0]:
import IPython.display as ipd
ipd.Audio('samples/225.wav')

# 聲音轉換
convert.py能一次轉換多個speaker及多個句子。

如:python3 convert.py 3 5，會選定3個speaker，倆倆互相轉換(共6種轉換方式)，每種轉換方式有5個句子。

最多可選擇5個speaker和5個句子。

轉換結果存在results資料夾中。

In [0]:
!python3 convert.py 2 2

### 播放轉換結果

In [0]:
import IPython.display as ipd
ipd.Audio('results/p225_p226/225_226_358.wav')

# 使用個人聲音做轉換


### 錄音
recorder.py中的get_audio()函數已寫好如何在colab中錄音，當圖示跳出即開始錄音。

錄音完成存檔檔名為record.wav。

In [0]:
from scipy.io import wavfile
from recorder import get_audio
audio = get_audio()
filename = 'record.wav'
wavfile.write(filename, 16000, audio)

### 轉換


In [0]:
# make conversion
import torch
import numpy as np
from convert import get_model
from scipy.io.wavfile import write
from torch.autograd import Variable
from preprocess.tacotron.norm_utils import spectrogram2wav, get_spectrograms

讀取模型

In [0]:
solver = get_model(hps_path = './hps/vctk.json', model_path = './pkl/model.pkl')

製作speaker-id對照表

In [0]:
with open('./hps/en_speaker_used.txt') as f:
	speakers = [line.strip() for line in f]
speaker2id = {speaker:i for i, speaker in enumerate(speakers)}

準備輸入資料

In [0]:
speaker = '225'
_, spec = get_spectrograms(filename)
spec_expand = np.expand_dims(spec, axis = 0)
spec_tensor = torch.from_numpy(spec_expand).type(torch.FloatTensor)
c = Variable(torch.from_numpy(np.array([speaker2id[speaker]]))).cuda()

使用模型做轉換

In [0]:
result = solver.test_step(spec_tensor, c, gen = True)

頻譜轉音訊並存檔

In [0]:
result = result.squeeze(axis = 0).transpose((1, 0))
wav_data = spectrogram2wav(result)
write('result.wav', rate = 16000, data = wav_data)

### 播放轉換結果

In [0]:
# listen sample audio
import IPython.display as ipd
ipd.Audio('result.wav')

# 結果可視化
將轉換前即轉換後的聲音檔分別繪成頻譜圖。

### 轉換前(錄音檔)

In [0]:
%matplotlib inline
from scipy.io import wavfile
from matplotlib import pyplot as plt
rate, x = wavfile.read('record.wav')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
ax1.plot(x)
ax1.set_title('Raw audio signal')
ax2.specgram(x, Fs = 2)
ax2.set_title('Spectrogram')

### 轉換後(模型輸出)

In [0]:
%matplotlib inline
from scipy.io import wavfile
from matplotlib import pyplot as plt
rate, x = wavfile.read('result.wav')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
ax1.plot(x)
ax1.set_title('Raw audio signal')
ax2.specgram(x, Fs = 2)
ax2.set_title('Spectrogram')