# 타코트론2 TTS 시스템

##https://joungheekim.github.io/2021/04/01/code-review/

### Step2.2 공백 자르기 & Sampling Rate 변경



In [8]:
## 라이브러리 Import
import numpy as np
import os
from tqdm.notebook import tqdm
import librosa
from pathlib import Path
import matplotlib.pyplot as plt
import IPython.display as ipd
import glob
import soundfile as sf

## 함수 설정

## 파일 읽어오기(지정한 Sampling Rate로)
def load_audio(file_path, sr=22050):
    """
       file_path : 파일위치
       sr : 오디오를 읽을 때 Sampling rate 지정
    """
    ## 확장자 추출
    ext = Path(file_path).suffix
    
    ## 파일 읽기
    if ext in ['.wav', '.flac']:
        wav, sr = librosa.load(file_path, sr=sr)
    elif ext == '.pcm':
        wav = np.memmap(file_path, dtype='h', mode='r').astype('float32') / 32767
    elif ext in ['.raw', '.RAW']:
        wav, sr = sf.read(file_path, channels=1, samlerate=sr, format='RAW', subtype='PCM_16')
    else:
        raise ValueError("Unsupported preprocess method : {0}".format(ext))
        
    return wav, sr

## 공백 자르기(패딩 추가)
def trim_audio(wav, top_db=10, pad_len=4000):
    """
    
    """
    ## 최대 db에 따라 음성의 자를 위치 판별
    non_silence_indices = librosa.effects.split(wav, top_db=top_db)
    start = non_silence_indices[0][0]
    end = non_silence_indices[-1][1]
    
    ## 음성 자르기
    wav = wav[start:end]
    
    ## padding 추가
    wav = np.hstack([np.zeros(pad_len), wav, np.zeros(pad_len)])
    
    return wav

## WAV 그려보기
def plot_wav(wav, sr):
    ## 그려보기
    plt.figure(1)

    plot_a = plt.subplot(211)
    plot_a.plot(wav)
    plot_a.set_xlabel('sample rate * time')
    plot_a.set_ylabel('energy')

    plot_b = plt.subplot(212)
    plot_b.specgram(wav, NFFT=1024, Fs=sr, noverlap=900)
    plot_b.set_xlabel('Time')
    plot_b.set_ylabel('Frequency')

    plt.show()


## 시작하기

## 타코트론2는 기본적으로 22050 sampling rate에서 동작
sampling_rate = 22050
## 개인설정에 따라 특정 소리보다 작은 음성을 삭제하도록 설정
decibel=10

## Wav 파일 읽어오기  pcm 또는 다른 확장자도 사용 가능.
## 잡음제거한 파일 dir
root_path = '/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/data/audio_data/2_noise_reduction_data'
file_list = glob.glob(os.path.join(root_path, "*.wav"))
#file_list = glob.glob(os.path.join(root_path, "*.pcm"))

## 저장할 위치 선택
save_path = '/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/data/audio_data/3_blank_remove_data'
os.makedirs(save_path, exist_ok=True)

for file_path in tqdm(file_list):
    
    ## 파일 불러오기(타코트론2는 기본적으로 22050 sampling rate에서 동작)
    wav, sr = load_audio(file_path, sr=sampling_rate)
    
    ## 오디오 자르기(패딩 추가)
    trimed_wav= trim_audio(wav, top_db=decibel)
    
    filename=Path(file_path).name
    temp_save_path = os.path.join(save_path, filename)
    
    ## 저장하기
    sf.write(temp_save_path, trimed_wav, sampling_rate)

  0%|          | 0/101 [00:00<?, ?it/s]

In [None]:
blank_remove_file_list = glob.glob(os.path.join(root_path, "*.wav"))
blank_remove_file_list

In [16]:
import pandas as pd
kss_script_csv = pd.read_csv("/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/data/script_data/kss_transcript.v.1.4_2.csv",encoding='cp949',)

In [17]:
kss_script_csv.head()

Unnamed: 0,file_name,script1,script2,tokenize,legnth,eng
0,1/1_0000.wav,그는 괜찮은 척하려고 애쓰는 것 같았다.,그는 괜찮은 척하려고 애쓰는 것 같았다.,????? ????????? ????????? ??????? ??? ????????.,3.5,He seemed to be pretending to be okay.
1,1/1_0001.wav,그녀의 사랑을 얻기 위해 애썼지만 헛수고였다.,그녀의 사랑을 얻기 위해 애썼지만 헛수고였다.,?????? ???????? ????? ???? ?????????? ????????...,4.0,I tried in vain to win her love.
2,1/1_0002.wav,용돈을 아껴 써라.,용돈을 아껴 써라.,????????? ???? ????.,1.8,Save your pocket money.
3,1/1_0003.wav,그는 아내를 많이 아낀다.,그는 아내를 많이 아낀다.,????? ??????? ????? ???????.,2.3,He cherishes his wife.
4,1/1_0004.wav,그 애 전화번호 알아?,그 애 전화번호 알아?,?? ?? ?????????? ??????,1.3,Do you know his number?


In [24]:
script_list = list(kss_script_csv['script1'])
script_list[0]
len(script_list)

12854

In [20]:
blank_remove_data_path = "/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/data/audio_data/3_blank_remove_data"
blank_remove_data_file_list = glob.glob(os.path.join(blank_remove_data_path, "*.wav"))
blank_remove_data_file_list[0]

'/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/data/audio_data/3_blank_remove_data/1_0008.wav'

In [None]:
f = open(r'/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/tacotron2/filelists/jm_train.txt', 'w')
for i in range(len(blank_remove_data_file_list)):
  print(i)
  f.write(blank_remove_data_file_list[i]+"|"+script_list[i]+"\n")
f.close

In [26]:
%cd /content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/tacotron2

/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/tacotron2


In [27]:
pwd

'/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/tacotron2'

In [None]:
# !pip install jamo
# !pip install Unidecode

output = '/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/models'
log = '/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/logs'

!python train.py --output_directory=output --log_directory=log --n_gpus=1 --training_files=filelists/jm_train.txt --validation_files=filelists/jm_dev.txt --epochs=500

FP16 Run: False
Dynamic Loss Scaling: True
Distributed Run: False
cuDNN Enabled: True
cuDNN Benchmark: False
2023-02-19 13:48:21.198708: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-19 13:48:22.093759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-19 13:48:22.093901: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /us

In [32]:
!pip install Unidecode

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Unidecode
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.9/235.9 KB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Unidecode
Successfully installed Unidecode-1.3.6


In [45]:
output

'/content/drive/MyDrive/새싹_인공지능SW교육/프로젝트/새싹_최종프로젝트/Kss_TTS/models'