# GPT-SoVITS version2の学習用サンプル利用コード
##利用方法
・基本的には全てのセルを実行することで利用できます。

・GPUを利用しましょう。（利用制限があるのでこまめにインスタンスは削除してください）

（変更する際は，「ランタイム」→「ランタイムのタイプを変更」からGPUを選択してください）

##注意点
・本ファイルの相対的な場所は変更しないでください。

・本ファイルの名前を変更しないでください。

変更する場合は，[1]セル目のファイル名も一緒に変更してください。

・[2]セル目の実行時にセッションの再起動を求められることがあります。

その場合はポップアップに従って再起動したあと，全てのセルを再度実行してください。

In [1]:
#Google Driveのフォルダをマウント（認証入る）
from google.colab import drive
drive.mount('/content/drive')

# カレントディレクトリを本ファイルが存在するディレクトリに変更する。
import glob
import os
main_dir = os.path.dirname(glob.glob('/content/drive/MyDrive/TTS-test/colab_GPT-SoVITS_train.ipynb', recursive=True)[0])
print(main_dir)

%cd $main_dir
!main_dir

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/TTS-test
/content/drive/MyDrive/TTS-test
/bin/bash: line 1: main_dir: command not found


## 必要パッケージのインストール


In [2]:
!pip install -r requirements.txt
!pip install ffmpeg-python

Ignoring onnxruntime: markers 'sys_platform == "darwin"' don't match your environment
Ignoring opencc: markers 'sys_platform != "linux"' don't match your environment


## 事前準備


In [3]:
import sys
sys.path.append("GPT_SoVITS")
sys.path.append("GPT_SoVITS/text")

#参考
#https://colab.research.google.com/drive/1rb0ZsoC1diyCzmFO2b49jIu_oDsEz77T#scrollTo=7GzgZE4hZGjf

# Functions
import os
import sys
import site
import warnings
import torch
import yaml
import json
import psutil

from tools import my_utils
from tools.asr.config import asr_dict
from config import python_exec,is_half,exp_root

## 環境変数の設定

now_dir = os.getcwd()
sys.path.insert(0, now_dir)

tmp = os.path.join(main_dir, "TEMP")
os.makedirs(tmp, exist_ok=True)
os.environ["TEMP"] = tmp

site_packages_roots = []
for path in site.getsitepackages():
    if "packages" in path:
        site_packages_roots.append(path)
if(site_packages_roots==[]):site_packages_roots=["%s/runtime/Lib/site-packages" % now_dir]
for site_packages_root in site_packages_roots:
    if os.path.exists(site_packages_root):
        try:
            with open("%s/users.pth" % (site_packages_root), "w") as f:
                f.write(
                    "%s\n%s/tools\n%s/tools/damo_asr\n%s/GPT_SoVITS\n%s/tools/uvr5"
                    % (now_dir, now_dir, now_dir, now_dir, now_dir)
                )
                print("%s/users.pth" % (site_packages_root))
            break
        except PermissionError:
            pass

#GPU設定などの設定
ngpu = torch.cuda.device_count()
gpu_infos = []
mem = []
if_gpu_ok = False

if torch.cuda.is_available() or ngpu != 0:
    for i in range(ngpu):
        gpu_name = torch.cuda.get_device_name(i)
        if any(value in gpu_name.upper()for value in ["10","16","20","30","40","A2","A3","A4","P4","A50","500","A60","70","80","90","M4","T4","TITAN","L4","4060"]):
            if_gpu_ok = True
            gpu_infos.append("%s\t%s" % (i, gpu_name))
            mem.append(int(torch.cuda.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4))

if if_gpu_ok and len(gpu_infos) > 0:
    gpu_info = "\n".join(gpu_infos)
    default_batch_size = min(mem) // 2
else:
    gpu_info = ("%s\t%s" % ("0", "CPU"))
    gpu_infos.append("%s\t%s" % ("0", "CPU"))
    default_batch_size = psutil.virtual_memory().total/ 1024 / 1024 / 1024 / 2
gpus = "-".join([i[0] for i in gpu_infos])




/usr/local/lib/python3.10/dist-packages/users.pth


In [6]:
## 学習データの前処理と学習用の関数を定義

def get_text(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir):
    inp_text = my_utils.clean_path(inp_text)
    inp_wav_dir = my_utils.clean_path(inp_wav_dir)

    opt_dir="%s/%s"%(exp_root,exp_name)
    config={
        "inp_text":inp_text,
        "inp_wav_dir":inp_wav_dir,
        "exp_name":exp_name,
        "opt_dir":opt_dir,
        "bert_pretrained_dir":bert_pretrained_dir,
    }
    gpu_names=gpu_numbers.split("-")
    all_parts=len(gpu_names)
    for i_part in range(all_parts):
        config.update(
            {
                "i_part": str(i_part),
                "all_parts": str(all_parts),
                "_CUDA_VISIBLE_DEVICES": gpu_names[i_part],
                "is_half": str(is_half)
            }
        )
        os.environ.update(config)
        cmd = 'PYTHONPATH="' + main_dir + '/GPT_SoVITS/text" ' + 'python GPT_SoVITS/prepare_datasets/1-get-text.py'
        print(cmd)
        !$cmd
    opt = []
    for i_part in range(all_parts):
        txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part)
        with open(txt_path, "r", encoding="utf8") as f:
            opt += f.read().strip("\n").split("\n")
        os.remove(txt_path)
    path_text = "%s/2-name2text.txt" % opt_dir
    with open(path_text, "w", encoding="utf8") as f:
        f.write("\n".join(opt) + "\n")


def get_hubert(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir):
    inp_text = my_utils.clean_path(inp_text)
    if inp_wav_dir is not None:
      inp_wav_dir = my_utils.clean_path(inp_wav_dir)

      config={
          "inp_text":inp_text,
          "inp_wav_dir":inp_wav_dir,
          "exp_name":exp_name,
          "opt_dir":"%s/%s"%(exp_root,exp_name),
          "cnhubert_base_dir":ssl_pretrained_dir,
          "is_half": str(is_half)
      }
    else:
      config={
          "inp_text":inp_text,
          "inp_wav_dir":"",
          "exp_name":exp_name,
          "opt_dir":"%s/%s"%(exp_root,exp_name),
          "cnhubert_base_dir":ssl_pretrained_dir,
          "is_half": str(is_half)
      }
    gpu_names=gpu_numbers.split("-")
    all_parts=len(gpu_names)
    for i_part in range(all_parts):
        config.update(
            {
                "i_part": str(i_part),
                "all_parts": str(all_parts),
                "_CUDA_VISIBLE_DEVICES": gpu_names[i_part],
            }
        )
        os.environ.update(config)
        cmd = '%s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py'%python_exec
        print(cmd)
        !$cmd

def get_semantic(inp_text,exp_name,gpu_numbers,pretrained_s2G_path):
    inp_text = my_utils.clean_path(inp_text)

    opt_dir="%s/%s"%(exp_root,exp_name)
    config={
        "inp_text":inp_text,
        "exp_name":exp_name,
        "opt_dir":opt_dir,
        "pretrained_s2G":pretrained_s2G_path,
        "s2config_path":"GPT_SoVITS/configs/s2.json",
        "is_half": str(is_half)
    }
    gpu_names=gpu_numbers.split("-")
    all_parts=len(gpu_names)
    for i_part in range(all_parts):
        config.update(
            {
                "i_part": str(i_part),
                "all_parts": str(all_parts),
                "_CUDA_VISIBLE_DEVICES": gpu_names[i_part],
            }
        )
        os.environ.update(config)
        cmd = '%s GPT_SoVITS/prepare_datasets/3-get-semantic.py'%python_exec
        print(cmd)
        !$cmd

    opt = ["item_name\tsemantic_audio"]
    path_semantic = "%s/6-name2semantic.tsv" % opt_dir
    for i_part in range(all_parts):
        semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part)
        with open(semantic_path, "r", encoding="utf8") as f:
            opt += f.read().strip("\n").split("\n")
        os.remove(semantic_path)
    with open(path_semantic, "w", encoding="utf8") as f:
        f.write("\n".join(opt) + "\n")

def train_sovits(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D, **kwargs):
    SoVITS_weight_root = kwargs.get("SoVITS_weight_root", "SoVITS_weights")
    os.makedirs(SoVITS_weight_root, exist_ok=True)

    with open("GPT_SoVITS/configs/s2.json")as f:
        data=f.read()
        data=json.loads(data)
    s2_dir="%s/%s"%(exp_root,exp_name)
    os.makedirs("%s/logs_s2"%(s2_dir),exist_ok=True)
    if(is_half==False):
        data["train"]["fp16_run"]=False
        batch_size=max(1,batch_size//2)
    data["train"]["batch_size"]=batch_size
    data["train"]["epochs"]=total_epoch
    data["train"]["text_low_lr_rate"]=text_low_lr_rate
    data["train"]["pretrained_s2G"]=pretrained_s2G
    data["train"]["pretrained_s2D"]=pretrained_s2D
    data["train"]["if_save_latest"]=if_save_latest
    data["train"]["if_save_every_weights"]=if_save_every_weights
    data["train"]["save_every_epoch"]=save_every_epoch
    data["train"]["gpu_numbers"]=gpu_numbers1Ba
    data["data"]["exp_dir"]=data["s2_ckpt_dir"]=s2_dir
    data["save_weight_dir"]=SoVITS_weight_root
    data["name"]=exp_name
    tmp_config_path="%s/tmp_s2.json"%tmp
    with open(tmp_config_path,"w")as f:f.write(json.dumps(data))

    cmd = '%s GPT_SoVITS/s2_train_simple.py --config "%s"'%(python_exec,tmp_config_path)

    print(cmd)
    !$cmd

def train_gpt(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1, **kwargs):
    GPT_weight_root = kwargs.get("GPT_weight_root", "GPT_weights")
    os.makedirs(GPT_weight_root, exist_ok=True)

    with open("GPT_SoVITS/configs/s1longer-v2.yaml")as f:
        data=f.read()
        data=yaml.load(data, Loader=yaml.FullLoader)
    s1_dir="%s/%s"%(exp_root,exp_name)
    os.makedirs("%s/logs_s1"%(s1_dir),exist_ok=True)
    if(is_half==False):
        data["train"]["precision"]="32"
        batch_size = max(1, batch_size // 2)
    data["train"]["batch_size"]=batch_size
    data["train"]["epochs"]=total_epoch
    data["pretrained_s1"]=pretrained_s1
    data["train"]["save_every_n_epoch"]=save_every_epoch
    data["train"]["if_save_every_weights"]=if_save_every_weights
    data["train"]["if_save_latest"]=if_save_latest
    data["train"]["if_dpo"]=if_dpo
    data["train"]["half_weights_save_dir"]=GPT_weight_root
    data["train"]["exp_name"]=exp_name
    data["train_semantic_path"]="%s/6-name2semantic.tsv"%s1_dir
    data["train_phoneme_path"]="%s/2-name2text.txt"%s1_dir
    data["output_dir"]="%s/logs_s1"%s1_dir

    os.environ["_CUDA_VISIBLE_DEVICES"]=gpu_numbers.replace("-",",")
    os.environ["hz"]="25hz"
    tmp_config_path="%s/tmp_s1.yaml"%tmp
    with open(tmp_config_path, "w") as f:f.write(yaml.dump(data, default_flow_style=False))

    cmd = '%s GPT_SoVITS/s1_train_simple.py --config_file "%s" '%(python_exec,tmp_config_path)

    print(cmd)
    !$cmd

## 前提条件の設定の記載


In [7]:
## 前提条件の設定
## "./{exp_root}/{model_name}/raw"に2−13秒ほどにスライスされた音声データを格納
## "./{exp_root}/{model_name}/{text_list}"に音声データの書き起こしを格納。
    ##PATHは"./{exp_root}/{model_name}/raw"からの相対パスで記入
## "./{exp_root}/{model_name}/{abs_text_list}"に音声データの書き起こしを格納
    ##PATHは絶対パスで記入
    ##[6]セル目にて、{text_list}から{abs_text_list}に変換する処理を用意しているが、あらかじめ{abs_text_list}を用意している場合は実行する必要はない

exp_root = "Data" ##整理済み学習データを格納するフォルダ
model_name = "amitaro_live" ##学習するモデルの名前。
text_list = "esd.list" ##音声データの書き起こしファイル（相対パス）
abs_text_list = "esd_absolute.list" ##音声データの書き起こしファイル（絶対パス）


## 事前学習モデル
pretrained_s1 = "./GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt"
pretrained_s2G = "./GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth"
pretrained_s2D = "./GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2D2333k.pth"

roberta_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
hubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"

## soVITS modelの学習パラメータ
batch_size = 4
total_epoch = 20
text_learning_rate = 0.4
save_frequency = 5
save_latest = True
save_every_weights = True
seve_weight_in_drive = True

## gpt modelの学習パラメータ
batch_size = 2
total_epoch = 20
dpo_training = False
save_frequency = 5
save_latest = True
save_every_weights = True
seve_weight_in_drive = True


## 音声データの書き起こしの絶対パス化

{text_list}の書き起こしファイルを{abs_text_list}に変換するセル

その際に、相対パスから絶対パスに書き換えを行なっています。

絶対パスで記載されている{abs_text_list}がすでに用意されている場合は、下記のセルは実行する必要はないです。



In [8]:
change_abs_path = f"{main_dir}/{exp_root}/{model_name}/raw"
model_dirs = f"{exp_root}/{model_name}"

# ここにesd.listファイルのパスを指定してください
file_path = f"{main_dir}/{exp_root}/{model_name}/{text_list}"

# ファイルを読み込み、行ごとに処理
with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()

# 各行のパス部分を絶対パスに変換
updated_lines = []
for line in lines:
    parts = line.strip().split('|')
    #print(parts)
    if len(parts) > 0:
        #parts[0] = os.path.abspath(parts[0])  # 相対パスを絶対パスに変換
        parts[0] = change_abs_path + '/' + parts[0]
    updated_lines.append('|'.join(parts))
    #print(parts[0])

# 結果を元のファイルまたは新しいファイルに保存
with open(model_dirs + '/' + abs_text_list, 'w', encoding='utf-8') as file:
    for updated_line in updated_lines:
        file.write(updated_line + '\n')

print('絶対パスへの変換が完了しました。')

絶対パスへの変換が完了しました。


## 学習データの前処理の実行


In [9]:
text_path = f"{exp_root}/{model_name}/{abs_text_list}"
dataset_path = f"{exp_root}/{model_name}/raw"

get_text(
    inp_text = text_path,
    inp_wav_dir = dataset_path,
    exp_name = model_name,
    gpu_numbers = f"{gpus}-{gpus}",
    bert_pretrained_dir = roberta_path
)

PYTHONPATH="/content/drive/MyDrive/TTS-test/GPT_SoVITS/text" python GPT_SoVITS/prepare_datasets/1-get-text.py
2024-05-19_Q-0.wav
Downloading: "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1/open_jtalk_dic_utf_8-1.11.tar.gz"
100% 22.6M/22.6M [00:00<00:00, 63.7MB/s]
Extracting tar file
2024-05-19_Q-10.wav
2024-05-19_Q-101.wav
2024-05-19_Q-103.wav
2024-05-19_Q-105.wav
2024-05-19_Q-107.wav
2024-05-19_Q-109.wav
2024-05-19_Q-110.wav
2024-05-19_Q-112.wav
2024-05-19_Q-114.wav
2024-05-19_Q-116.wav
2024-05-19_Q-118.wav
2024-05-19_Q-12.wav
2024-05-19_Q-121.wav
2024-05-19_Q-123.wav
2024-05-19_Q-125.wav
2024-05-19_Q-127.wav
2024-05-19_Q-129.wav
2024-05-19_Q-130.wav
2024-05-19_Q-132.wav
2024-05-19_Q-134.wav
2024-05-19_Q-136.wav
2024-05-19_Q-138.wav
2024-05-19_Q-14.wav
2024-05-19_Q-141.wav
2024-05-19_Q-143.wav
2024-05-19_Q-145.wav
2024-05-19_Q-147.wav
2024-05-19_Q-149.wav
2024-05-19_Q-150.wav
2024-05-19_Q-152.wav
2024-05-19_Q-154.wav
2024-05-19_Q-156.wav
2024-05-19_Q-158.wav
2024-05-19_

In [10]:

get_hubert(
    inp_text = text_path,
    inp_wav_dir = None,
    exp_name = model_name,
    gpu_numbers = f"{gpus}-{gpus}",
    ssl_pretrained_dir = hubert_path
)



/usr/bin/python3 GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
/usr/bin/python3 GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py


In [11]:
get_semantic(
    inp_text = text_path,
    exp_name = model_name,
    gpu_numbers = f"{gpus}-{gpus}",
    pretrained_s2G_path = pretrained_s2G
)

/usr/bin/python3 GPT_SoVITS/prepare_datasets/3-get-semantic.py
<All keys matched successfully>
/usr/bin/python3 GPT_SoVITS/prepare_datasets/3-get-semantic.py
<All keys matched successfully>


## soVITS modelの学習コード


In [None]:
"""
s2_train.py line:116
    train_loader = DataLoader(
        train_dataset,
        num_workers=0, ##6
        shuffle=False,
        pin_memory=True,
        collate_fn=collate_fn,
        batch_sampler=train_sampler,
        persistent_workers=False, ##True,
        prefetch_factor=None, ##4,
に修正
"""
#warnings.filterwarnings("ignore")



if seve_weight_in_drive:
    SoVITS_weight_root = f"./{exp_root}/{model_name}/soVITS_model"
else:
    SoVITS_weight_root = f"/content/soVITS_model"
os.makedirs(SoVITS_weight_root, exist_ok=True)


train_sovits(
    batch_size = batch_size,
    total_epoch = total_epoch,
    exp_name = model_name,
    text_low_lr_rate = text_learning_rate,
    if_save_latest = save_latest,
    if_save_every_weights = save_every_weights,
    save_every_epoch = save_frequency,
    gpu_numbers1Ba = gpus,
    pretrained_s2G = pretrained_s2G,
    pretrained_s2D = pretrained_s2D,
    SoVITS_weight_root = SoVITS_weight_root
)

/usr/bin/python3 GPT_SoVITS/s2_train_simple.py --config "/content/drive/MyDrive/TTS-test/TEMP/tmp_s2.json"
2024-08-10 05:06:07.835782: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-10 05:06:08.117910: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-10 05:06:08.204259: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-10 05:06:08.694890: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, re

## gpt modelの学習コード


In [None]:

"""
s1_train.py line:130
strategy = "auto",
に修正


File "/content/drive/MyDrive/TTS-test/GPT_SoVITS/AR/data/bucket_sampler.py", line 45, in __init__
    num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1
ここを変更
num_replicas = 1

同じファイルでline50
rank =  0
に変更

上記をなくしてbucket_sampler.pyは今入っているものに上書き
https://chatgpt.com/c/745d2740-e0d7-4b04-a600-b954754e4a82

deta_module.py line:48
        return DataLoader(
            self._train_dataset,
            batch_size=batch_size,
            sampler=sampler,
            collate_fn=self._train_dataset.collate,
            num_workers= 0, ##self.num_workers,
            persistent_workers=False, ##True,
            prefetch_factor=None, ##16,
        )

line:58
    def val_dataloader(self):
        return DataLoader(
            self._dev_dataset,
            batch_size=1,
            shuffle=False,
            collate_fn=self._train_dataset.collate,
            num_workers=0, ##max(self.num_workers, 12),
            persistent_workers=False, ##True,
            prefetch_factor=None, ##16,
        )
上記の通り変更


"""




if seve_weight_in_drive:
    GPT_weight_root = f"./{exp_root}/{model_name}/GPT_model"
else:
    GPT_weight_root = f"/content/GPT_model"
os.makedirs(GPT_weight_root, exist_ok=True)



train_gpt(
    batch_size = batch_size,
    total_epoch = total_epoch,
    exp_name = model_name,
    if_dpo = dpo_training,
    if_save_latest = save_latest,
    if_save_every_weights = save_every_weights,
    save_every_epoch = save_frequency,
    gpu_numbers = gpus,
    pretrained_s1 = pretrained_s1,
    GPT_weight_root = GPT_weight_root
)

In [None]:
from google.colab import runtime
runtime.unassign()