# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/toddlack/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb)

In [None]:
# @title #Check out the graphics card
!nvidia-smi

In [None]:
# @title Attach Google Cloud Drive

from google.colab import drive

drive.mount("/content/drive")

In [None]:
# @title Install dependencies
!apt-get -y install build-essential python3-dev ffmpeg
!apt -y install -qq aria2
%pip install --upgrade setuptools wheel
%pip install --upgrade pip
%pip install python-dotenv av torchcrepe
%pip install faiss-cpu==1.7.2 fairseq-fixed gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy numba librosa


In [None]:
# @title Clone repo for local use
#Clone a repo and cd into the new directory
content_root='/content'
git_repo='https://github.com/toddlack/Retrieval-based-Voice-Conversion-WebUI.git'
app_root=git_repo.split('/')[-1].replace('.git', '')
assets_dir=f'{content_root}/{app_root}/assets'
%cd {content_root}
!git clone {git_repo}
%cd {app_root}


# !mkdir Retrieval-based-Voice-Conversion-WebUI
# %cd /content/Retrieval-based-Voice-Conversion-WebUI
# !git init
# !git remote add origin https://github.com/toddlack/Retrieval-based-Voice-Conversion-WebUI.git
# !git fetch origin cfd984812804ddc9247d65b14c82cd32e56c1133 --depth=1
# !git reset --hard FETCH_HEAD

In [None]:
# @title downlad models into the application's 'asset' directory. The web UI will use these.
import os
os.environ['ASSETS_DIR'] = f'{assets_dir}'
# v1
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d ${ASSETS_DIR}/pretrained -o D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d ${ASSETS_DIR}/pretrained -o D40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d ${ASSETS_DIR}/pretrained -o D48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d ${ASSETS_DIR}/pretrained -o G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d ${ASSETS_DIR}/pretrained -o G40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d ${ASSETS_DIR}/pretrained -o G48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d ${ASSETS_DIR}/pretrained -o f0D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d ${ASSETS_DIR}/pretrained -o f0D40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d ${ASSETS_DIR}/pretrained -o f0D48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d ${ASSETS_DIR}/pretrained -o f0G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d ${ASSETS_DIR}/pretrained -o f0G40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d ${ASSETS_DIR}/pretrained -o f0G48k.pth

# v2
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d ${ASSETS_DIR}/pretrained_v2 -o D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d ${ASSETS_DIR}/pretrained_v2 -o D40k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d ${ASSETS_DIR}/pretrained_v2 -o D48k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d ${ASSETS_DIR}/pretrained_v2 -o G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d ${ASSETS_DIR}/pretrained_v2 -o G40k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d ${ASSETS_DIR}/pretrained_v2 -o G48k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d ${ASSETS_DIR}/pretrained_v2 -o f0D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d ${ASSETS_DIR}/pretrained_v2 -o f0D40k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d ${ASSETS_DIR}/pretrained_v2 -o f0D48k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d ${ASSETS_DIR}/pretrained_v2 -o f0G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d ${ASSETS_DIR}/pretrained_v2 -o f0G40k.pth
# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d ${ASSETS_DIR}/pretrained_v2 -o f0G48k.pth

In [None]:
# @title #Download the vocal separation model
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d ${ASSETS_DIR}/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d ${ASSETS_DIR}/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth

#download hubert_base
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d ${ASSETS_DIR}/hubert -o hubert_base.pt

#Download the RMVPE model
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d ${ASSETS_DIR}/rmvpe -o rmvpe.pt

In [None]:
#Load the packaged dataset from Google Cloud Drive to /content/dataset

# @markdown Dataset location - WAV files to train on. Unzip to dataset directory
DATASET = "/content/drive/MyDrive/dataset/tammy20min.zip"  # @param {type:"string"}


!mkdir -p /content/dataset
!unzip -d /content/dataset -B {DATASET}

In [None]:
# @title #Rename the duplicate file in the dataset
!ls -a /content/dataset/
!rename 's/(\w+)\.(\w+)~(\d*)/$1_$3.$2/' /content/dataset/*.*~*

In [None]:
# @title #Launch Webui

%cd /content/Retrieval-based-Voice-Conversion-WebUI
#%load_ext tensorboard
#%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs
!python3 infer-web.py --colab --pycmd python3

In [None]:
# @title #Manually back up the trained model file to Google Cloud Drive
# @markdown #You need to check the file name of the model in the logs folder and manually change the file name at the end of the following command

# @markdown #Model Name
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown #Model Epoch
MODELEPOCH = 9600  # @param {type:"integer"}
# @markdown destination models directory. New model directory created here.
DRIVE_DIR = "ai_voice"  #@param {type:"string"}
!mkdir -p /content/drive/MyDrive/ai_voice/{MODELNAME}
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{DRIVE_DIR}/{MODELNAME}/{MODELNAME}_D_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{DRIVE_DIR}/{MODELNAME}/{MODELNAME}_G_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/{DRIVE_DIR}/{MODELNAME}
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/{DRIVE_DIR}/{MODELNAME}

!cp /content/Retrieval-based-Voice-Conversion-WebUI/assets/weights/{MODELNAME}.pth /content/drive/MyDrive/{DRIVE_DIR}/{MODELNAME}/{MODELNAME}{MODELEPOCH}.pth

In [None]:
# @title Recover from Google Cloud Drive pth
# @markdown You need to check the file name of the model in the logs folder and manually change the file name at the end of the following command

# @markdown Model name
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown Model epoch
MODELEPOCH = 7500  # @param {type:"integer"}

!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}

!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth
!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth
!cp /content/drive/MyDrive/*.index /content/
!cp /content/drive/MyDrive/*.npy /content/
!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth

In [None]:
# @title Manual pre-processing (not recommended)
# @markdown Model name
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown Sample rate
BITRATE = 48000  # @param {type:"integer"}
# @markdown Number of processes used
THREADCOUNT = 8  # @param {type:"integer"}
# @markdown location of audio files to train on.
DATA_DIR='/content/dataset' # @param {type:"string"}
!python3 trainset_preprocess_pipeline_print.py {DATA_DIR} {BITRATE} {THREADCOUNT} logs/{MODELNAME} True

In [None]:
# @title Manual feature extraction (not recommended)
# @markdown Model name
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown Number of threads
THREADCOUNT = 8  # @param {type:"integer"}
# @markdown Alogorithm - need list here.
ALGO = "harvest"  # @param {type:"string"}

!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}

!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True

In [None]:
# @title Manual training (not recommended)
# @markdown Model name
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown Used GPU
USEGPU = "0"  # @param {type:"string"}
# @markdown Batch size
BATCHSIZE = 32  # @param {type:"integer"}
# @markdown Stopped epochs
MODELEPOCH = 3200  # @param {type:"integer"}
# @markdown Nuber of epochs to train
EPOCHSAVE = 100  # @param {type:"integer"}
# @markdown Sample Rate
MODELSAMPLE = "48k"  # @param {type:"string"}
# @markdown Whether to cache the training set
CACHEDATA = 1  # @param {type:"integer"}
# @markdown 是否仅保存最新的ckpt文件
ONLYLATEST = 0  # @param {type:"integer"}

!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}

In [None]:
# @title 删除其它pth，只留选中的（慎点，仔细看代码）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 选中模型epoch
MODELEPOCH = 9600  # @param {type:"integer"}

!echo "备份选中的模型。。。"
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth

!echo "正在删除。。。"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}
!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth

!echo "恢复选中的模型。。。"
!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth
!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth

!echo "删除完成"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}

In [None]:
# @title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 选中模型epoch
MODELEPOCH = 9600  # @param {type:"integer"}

!echo "备份选中的模型。。。"
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth

!echo "正在删除。。。"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}
!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*

!echo "恢复选中的模型。。。"
!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth
!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth

!echo "删除完成"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}