<a href="https://colab.research.google.com/github/Archivoice/Diff-SVC-notebooks/blob/main/Diff_SVC_inference_notebook_(colab_ver_).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Inference notebook for [Diff-SVC](https://github.com/prophesier/diff-svc) written by [Nekro](https://twitter.com/NekroTheCorpse) of [Archivoice](https://github.com/archivoice)


# Check Setup

In [None]:
#@title #Check GPU type
#@markdown At this stage it's not really necessary, the best it does is let you guess how fast it can render
#@markdown ####¯\\_(ツ)_/¯
!nvidia-smi -L
!nvidia-smi

In [None]:
#@title #Mount Google Drive

#@markdown Makes your life easier when uploading and saving stuff.

from google.colab import drive
drive.flush_and_unmount()
!rm -rf /content/drive
drive.mount('/content/drive')
print('Done!')

In [None]:
#@title #Install Diff-SVC
#@markdown The stuff you'll need for every other thing afterwards.

from IPython.display import clear_output 
from google.colab import files 
import os
print('Upgrading pip & installing 7zip')
!rm -rf /content/sample_data
!python -m pip install --upgrade pip
!python -m pip install --upgrade wheel
!apt-get install unzip
!pip install gdown

print('Installing torch')
%pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
!pip install --pre torchtext==0.6.0 --no-deps

print('Installing Diff-SVC')
!git clone https://github.com/prophesier/diff-svc &> /dev/null
%cd 

print('Installing requirements')
%cd "/content/diff-svc/"
!pip install -r requirements_short.txt
!pip install tensorboard<2.9,>=2.8
%reload_ext tensorboard

%cd "/content/diff-svc/training/"
!rm config.yaml
!gdown 'https://drive.google.com/uc?id=1FeYxQZI-n-_GLPktq1aEVzXH0IM7_i3F' -O config.yaml
%cd "/content/"
# !gdown 'https://drive.google.com/uc?id=1MqxItZvE7Xf-ae5QeW9nsfK7qzyOx5KH' -O checkpoints.zip
%mkdir -p /content/diff-svc/checkpoints/
!unzip /content/drive/MyDrive/checkpoints.zip -d /content/diff-svc/

print('Done!')

In [None]:
#@title #Load singer model

#@markdown ---

#@markdown Load in the full path of your model and config.  

#@markdown `project_name` is the name of your singer, `model_path`, as the name states, is the path directory to your model (full path), same goes for `config_path`.

#@markdown Ex:

#@markdown project_name = test

#@markdown model_path = /content/drive/MyDrive/Diff-SVC/checkpoints/test/model_ckpt_steps_50000.ckpt

#@markdown config_path = /content/drive/MyDrive/Diff-SVC/checkpoints/test/config.yaml

#@markdown The model below is a default model, change the settings to use your own model.

#@markdown ---

%cd "/content/diff-svc/"

os.environ['PYTHONPATH']='.'

!CUDA_VISIBLE_DEVICES=0
from utils.hparams import hparams
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import utils
import librosa
import torchcrepe
from infer import *
import logging
from infer_tools.infer_tool import *

logging.getLogger('numba').setLevel(logging.WARNING)

project_name = "nyaru" #@param {type: "string"}
model_path = "./checkpoints/nyaru/model_ckpt_steps_112000.ckpt" #@param {type: "string"}
config_path="./checkpoints/nyaru/config.yaml" #@param {type: "string"}
hubert_gpu=True
svc_model = Svc(project_name,config_path,hubert_gpu, model_path)
print('model loaded')

# Rendering
Finally, the fun part.

In [None]:
#@markdown ## Upload audio here

%cd "/content/diff-svc/raw/"

print("\n\033[34m\033[1mupload your audio")
listfn, length = files.upload().popitem()

%cd "/content/diff-svc/"
print("\n\033[32m\033[1mdone")

In [None]:
#@markdown # Input audio and adjust parameters
#@markdown Additional parameters can be adjusted by double-clicking this cell.
#@markdown ___
#@markdown *Note: Read the document on the Github page for more detailed info on adjusting the values*
#@markdown ___

wav_fn='raw/test_input.wav' #@param {type: "string"}
#@markdown (input file name.)
demoaudio, sr = librosa.load(wav_fn)
key = 0#@param {type: "integer"}
#@markdown (key basically shifts the reference audio up or down by semitone, postitive and negative values are ok.)
pndm_speedup = 20 #@param {type: "integer"}
#@markdown (pndm_speedup adjusts the rendering speed at the stake of audio quality, default works fine unless you're in a hurry.)
wav_gen='test_output.wav' #@param {type: "string"}
#@markdown (output file name.)
f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=pndm_speedup, use_crepe=True, use_pe=True, thre=0.05,
                                        use_gt_mel=False, add_noise_step=500,project_name=project_name,out_path=wav_gen)

In [None]:
#@markdown #Display results
ipd.display(ipd.Audio(demoaudio, rate=sr))
ipd.display(ipd.Audio(audio, rate=hparams['audio_sample_rate'], normalize=False))

In [None]:
#@markdown #Display graph

#f0_gen,_=get_pitch_crepe(*vocoder.wav2spec(wav_gen),hparams,threshold=0.05)
%matplotlib inline
f0_gen,_=get_pitch_parselmouth(*svc_model.vocoder.wav2spec(wav_gen),hparams)
f0_tst[f0_tst==0]=np.nan#ground truth f0
f0_pred[f0_pred==0]=np.nan#f0 pe predicted
f0_gen[f0_gen==0]=np.nan#f0 generated
fig=plt.figure(figsize=[15,5])
plt.plot(np.arange(0,len(f0_tst)),f0_tst,color='black')
plt.plot(np.arange(0,len(f0_pred)),f0_pred,color='orange')
plt.plot(np.arange(0,len(f0_gen)),f0_gen,color='red')
plt.axhline(librosa.note_to_hz('C4'),ls=":",c="blue")
plt.axhline(librosa.note_to_hz('G4'),ls=":",c="green")
plt.axhline(librosa.note_to_hz('C5'),ls=":",c="orange")
plt.axhline(librosa.note_to_hz('F#5'),ls=":",c="red")
#plt.axhline(librosa.note_to_hz('A#5'),ls=":",c="black")
plt.show()