<a href="https://colab.research.google.com/github/torkralle/colab-overtone/blob/master/vocoder_time_invariant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# モジュールと音声の読み込み

In [1]:
import numpy as np
import sys
sys.path.append('/content/drive/MyDrive/Laboratory/Colab/overtone/modules')
import wave_file as wav
import window_function as wf
import average_function as af
import furie_function as ff
import filter
import helper

In [2]:
plain_sound_path = '/content/drive/MyDrive/Laboratory/Colab/overtone/sounds/plain.wav'
fs, plain = wav.wave_read_16bit_mono(plain_sound_path)

overtone_path = '/content/drive/MyDrive/Laboratory/Colab/overtone/sounds/overtone.wav'
fs, overtone = wav.wave_read_16bit_mono(overtone_path)

# 定数の定義

In [3]:
length_of_plain = len(plain)
length_of_overtone = len(overtone)

N = 4096

band_width = 8
number_of_band = int(N / 2 / band_width)
number_of_frame = int((length_of_plain - N / 2) / (N / 2))

master_volume = 0.5

new_sound_name = "1.wav"

# 配列の準備

In [4]:
overtone = filter.highpass_filter(overtone)

x = np.zeros(N)
b = np.zeros(N)
Y = np.zeros(N, dtype = np.complex)
new_sound = np.zeros(length_of_plain)

# 本処理

In [5]:
offset = 48000
b = wf.symply_multiple_hanning_window(offset, b, overtone)
B = np.abs(np.fft.fft(b, N))

for band in range(number_of_band):
    offset = band_width * band
    for k in range(band_width):
        B[offset + k] = af.get_average(band_width, offset, B)

# 折返し処理
B = ff.symmetric_copy(B)

for frame in range(number_of_frame):
    offset = int(N / 2) * frame

    x = wf.symply_multiple_hanning_window(offset, x, plain)
    X = np.fft.fft(x, N)
    Y = ff.convolution(X, B)
    y = np.real(np.fft.ifft(Y, N))

    for n in range(N):
        new_sound[offset + n] += y[n]

new_sound = helper.normalize_sound(new_sound, master_volume)
wav.wave_write_16bit_mono(fs, new_sound, new_sound_name)

In [6]:
fs, new_sound_data = wav.wave_read_16bit_mono(new_sound_name)

In [9]:
distance_from_plain, path_from_plain = helper.get_euclidean_dtw(overtone, plain)
distance_from_new, path_from_new = helper.get_euclidean_dtw(overtone, new_sound_data)
print("元の音声とホーミーの距離: ", distance_from_plain)
print("作った音声とホーミーの距離: ", distance_from_new)

元の音声とホーミーの距離:  3083.092301506377
作った音声とホーミーの距離:  5826.240450069382
