**Maintained by [justinjohn03](https://github.com/justinjohn0306)**
___
Special thanks to: [MLo7](https://github.com/MLo7Ghinsan)

## Before training

This program saves the last 3 generations of models to Google Drive. Since 1 generation of models is >1GB, you should have at least 3GB of free space in Google Drive. If you do not have such free space, it is recommended to create another Google Account.

Training requires >10GB VRAM. (T4 should be enough) Inference does not require such a lot of VRAM.

## **Installation**

In [None]:
#@title Check GPU
!nvidia-smi -L

In [None]:
#@title Install so-vits-svc-fork

#@markdown # Install so-vits-svc-fork, mount google drive and select which directories to sync with google drive

#@markdown

%cd /content
!mkdir -p "dataset_raw"
!mkdir -p "logs/44k"
!mkdir -p "configs"
!mkdir -p "raw"

import os
from IPython.display import clear_output
from google.colab import drive
from IPython.display import Audio, display, HTML

if not os.path.exists("/content/play_sound"):
    os.makedirs("/content/play_sound")
%cd /content/play_sound
!wget -O setup_complete.wav https://github.com/MLo7Ghinsan/MLo7_Diff-SVC_models/releases/download/audio/setup_complete.wav

drive.mount("/content/drive", force_remount=True)

clear_output()

%cd /content

!python -m pip install -U pip setuptools wheel
%pip install -U ipython 
#@markdown Branch (for development)
BRANCH = "none" #@param {"type": "string"}
if BRANCH == "none":
    %pip install -U so-vits-svc-fork
else:
    %pip install -U git+https://github.com/34j/so-vits-svc-fork.git@{BRANCH}

clear_output()

#@markdown Directory to store **necessary files**, dont miss the slash at the end👇.
sovits_data_dir = "/content/drive/MyDrive/so-vits-svc-fork/"  #@param {type:"string"}
#@markdown By default it will create a `so-vits-svc-fork/` folder in your google drive.
RAW_DIR = sovits_data_dir + "raw/"
RESULTS_DIR = sovits_data_dir + "results/"
FILELISTS_DIR = sovits_data_dir + "filelists/"
CONFIGS_DIR = sovits_data_dir + "configs/"
LOGS_DIR = sovits_data_dir + "logs/44k/"

#@markdown

#@markdown ### These folders will be synced with your google drive

#@markdown　### **Strongly recommend to check all.**

#@markdown Sync **input audios** and **output audios**
sync_raw_and_results = True  #@param {type:"boolean"}
if sync_raw_and_results:
  !mkdir -p {RAW_DIR}
  !mkdir -p {RESULTS_DIR}
  !rm -rf /content/raw
  !rm -rf /content/results
  !ln -s {RAW_DIR} /content/raw
  !ln -s {RESULTS_DIR} /content/results

#@markdown Sync **config** and **models**
sync_configs_and_logs = True  #@param {type:"boolean"}
if sync_configs_and_logs:
  !mkdir -p {FILELISTS_DIR}
  !mkdir -p {CONFIGS_DIR}
  !mkdir -p {LOGS_DIR}
  !rm -rf /content/filelists
  !rm -rf /content/configs
  !rm -rf /content/logs/44k
  !ln -s {FILELISTS_DIR} /content/filelists
  !ln -s {CONFIGS_DIR} /content/configs
  !ln -s {LOGS_DIR} /content/logs/44k


  clear_output()

print("setup complete!")
print("|")
print("|")
print("|")

chika_dance = '<img src="https://cdn.discordapp.com/attachments/816517150175920138/1090112497446563950/icegif-2013.gif"/>'
display(HTML(chika_dance))

with open("/content/play_sound/setup_complete.wav", "rb") as f:
    setup_complete_sound = f.read()
Audio(data=setup_complete_sound, autoplay=True)

## **Preperation**

Make sure the file structure in your zip file looks like this for Multispeakers:

```
YourZIPforMultipleSpeakers.zip
├───speaker0
│   ├───xxx1-xxx1.wav
│   ├───...
│   └───Lxx-0xx8.wav
└───speaker1
    ├───xx2-0xxx2.wav
    ├───...
    └───xxx7-xxx007.wav
```

In [None]:
import zipfile
from tqdm import tqdm
#@title #2.0 | Extract data | Resume training from checkpoint
%cd /content
clear_output()
#@markdown ___
#@markdown ###Train from scratch section
#@markdown +=========================+
#@markdown ####Directory of the zip file that contain all of your recordings that you want to use to train a model
train_from_scratch = False #@param {type:"boolean"}
raw_data_zip_path = "/content/drive/MyDrive/so-vits-svc-fork/test.zip"  #@param {type:"string"}
model_name = "test" #@param {type:"string"}

#@markdown ___
#@markdown ###Resume training section
#@markdown +=======================+
resume_training = False #@param {type:"boolean"}
#@markdown Directory of the zip file that THIS NOTEBOOK saved, or any zip that is in the same structure
preprocessed_data_zip_path = "/content/drive/MyDrive/so-vits-svc-fork/dataset.zip" #@param {type:"string"}
#@markdown ___

class DeezNutz(Exception):
    pass
if train_from_scratch and resume_training:
# sussy
# amogus
    raise DeezNutz("You can't select both of the options")
# If you are seeing this that means you select both options, you can't do that!
else:
  pass

if not train_from_scratch and not resume_training:
# sussy
# amogus
    raise DeezNutz("You need to select an option to proceed")
# If you are seeing this that means you didn't select anything!
else:
  pass

if train_from_scratch:
  if not os.path.exists(f"/content/dataset_raw/{model_name}"):
    os.makedirs(f"/content/dataset_raw/{model_name}")
  with zipfile.ZipFile(raw_data_zip_path, "r") as zip_ref:
    wav_files = [f for f in zip_ref.namelist() if f.endswith('.wav')]
    for file in tqdm(iterable=wav_files, total=len(wav_files), desc="Extracting files", unit="files"):
      zip_ref.extract(member=file, path=f"/content/dataset_raw/{model_name}")
  print("Training option: train a model from scratch")
else:
  pass

if resume_training:
  with zipfile.ZipFile(preprocessed_data_zip_path, "r") as zip_ref:
    for file in tqdm(iterable=zip_ref.namelist(), total=len(zip_ref.namelist()), desc="Extracting files", unit="files"):
      zip_ref.extract(member=file)
  print(" Training option: resume training from preprocessed data")
else:
  pass

print("|")
print("|")
print("|")
print("Done!")

## (Optional) **Load your multispeaker dataset from drive**

In [None]:
#@title Get raw dataset from google drive

#@markdown # Get raw dataset from google drive

#@markdown

#@markdown Directory where **your zip file** located in, dont miss the slash at the end👇.
sovits_data_dir = "/content/drive/MyDrive/so-vits-svc-fork/"  #@param {type:"string"}
#@markdown Filename of **your zip file**, do NOT be "dataset.zip"
zip_filename = "YourDataset.zip"  #@param {type:"string"}
ZIP_PATH = sovits_data_dir + zip_filename

!unzip -od /content/dataset_raw {ZIP_PATH}

# **Preprocessing**

In [None]:
#@markdown ## Resample audio
!svc pre-resample

In [None]:
#@markdown ## Generate config and filelists
!svc pre-config -t so-vits-svc-4.0v1-legacy

In [None]:
#@markdown ## Extract F0

F0_METHOD = "dio" #@param ["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]
!svc pre-hubert -fm {F0_METHOD}

# **Optional**

In [None]:
#@title Backup the preprocessed dataset to google drive

#@markdown # Backup the preprocessed dataset to google drive

#@markdown

#@markdown You can save the dataset and the preprocessed files to your google drive for the next training

#@markdown **Directory for saving**, dont miss the slash at the end👇.
sovits_data_dir = "/content/drive/MyDrive/so-vits-svc-fork/" #@param {type:"string"}

#@markdown There will be a `dataset.zip` contained `dataset/` in your google drive, which is preprocessed data.

!mkdir -p {sovits_data_dir}
!zip -r dataset.zip dataset
!cp -vr dataset.zip "{sovits_data_dir}"
!rm dataset.zip

# **Training**

In [None]:
#@markdown ## Start training
%load_ext tensorboard
%tensorboard --logdir drive/MyDrive/so-vits-svc-fork/logs/44k
!svc train --model-path drive/MyDrive/so-vits-svc-fork/logs/44k

## **Training Cluster model**

(Optional but still recommended) 

In [None]:
#@markdown ## Start training cluster model

!svc train-cluster --output-path drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt

# **Inference**
__________

# **Inference for Basic Users**

In [None]:
#@title #2.0 Start inference

#@markdown Parameters see [README.MD#inference](https://github.com/voicepaw/so-vits-svc-fork#usage)

#@markdown

input_wav_path = "/content/play_sound/setup_complete.wav"  #@param {type:"string"}
wav_base_name = os.path.splitext(os.path.basename(input_wav_path))[0]
model_path = "/content/drive/MyDrive/so-vits-svc-fork/logs/44k/G_12000.pth"  #@param {type:"string"}
model_name = "test"  #@param {type:"string"}
config_path = "/content/drive/MyDrive/so-vits-svc-fork/logs/44k/config.json"  #@param {type:"string"}
f0_method = "dio" #@param ["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]
pitch = 0 #@param {type:"slider", min:-36, max:36, step:1}

output_path = "/content/play_sound" #@param {type:"string"}
output_filetype = ".flac" #@param [".wav", ".flac"]
output_audio = output_path + "/" + wav_base_name + "_" + model_name + "_" + f"pitch={pitch}" + "_" + f0_method + output_filetype
#@markdown

#@markdown Advanced parameters (keep default if you don't know what they do):


!svc infer {input_wav_path} -m {model_path} -s {model_name} -c {config_path} -fm {f0_method} -t {pitch} -na -o {output_audio}


In [None]:
#@title #2.0 Display inferred audio

from IPython.display import display, Audio, HTML
import base64

print("Input Audio")
display(Audio(f"{input_wav_path}", autoplay=False))
audio_data = open(output_audio, "rb").read()
print("Synthesized Audio")
display(Audio(audio_data, autoplay=True))
print("|")
print("|")
print("|")
if os.path.exists(output_audio):
    with open(output_audio, "rb") as f:
        audio_bytes = f.read()
    b64 = base64.b64encode(audio_bytes).decode("utf-8")
    href = f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(output_audio)}" style="font-size: 40px;">Download Audio</a>'
    display(HTML(href))


# **Inference for Advanced Users**

Click this bar to unlock the cells 👇

In [None]:
#@title #2.1 Start inference (For Advanced Users)

#@markdown Parameters see [README.MD#inference](https://github.com/voicepaw/so-vits-svc-fork#usage)

#@markdown

input_wav_path = "/content/play_sound/setup_complete.wav"  #@param {type:"string"}
wav_base_name = os.path.splitext(os.path.basename(input_wav_path))[0]
model_path = "/content/drive/MyDrive/so-vits-svc-fork/logs/44k/G_12000.pth"  #@param {type:"string"}
model_name = "test"  #@param {type:"string"}
config_path = "/content/drive/MyDrive/so-vits-svc-fork/logs/44k/config.json"  #@param {type:"string"}
cluster_model_path = "/content/drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt" #@param {type:"string"}
f0_method = "dio" #@param ["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]
pitch = 0 #@param {type:"slider", min:-36, max:36, step:1}

output_path = "/content/play_sound" #@param {type:"string"}
output_filetype = ".flac" #@param [".wav", ".flac"]
output_audio = output_path + "/" + wav_base_name + "_" + model_name + "_" + f"pitch={pitch}" + "_" + f0_method + output_filetype
#@markdown

#@markdown Advanced parameters (keep default if you don't know what they do):

threshold_db = -40 #@param {type:"slider", min:-60, max:0, step:5}
cluster_ratio = 0 #@param {type:"slider", min:0, max:1, step:0.01}
noise_scale = 0.4 #@param {type:"slider", min:0, max:1, step:0.05}
pad_seconds = 0.5 #@param {type:"slider", min:0, max:1, step:0.01}
chunk_seconds = 0.5 #@param {type:"slider", min:0, max:3, step:0.05}


!svc infer {input_wav_path} -m {model_path} -s {model_name} -c {config_path} -fm {f0_method} -t {pitch} -db {threshold_db} -k {cluster_model_path} -r {cluster_ratio} -n {noise_scale} -p {pad_seconds} -ch {chunk_seconds} -na -o {output_audio}


In [None]:
#@title #2.1 Display inferred audio

from IPython.display import display, Audio, HTML
import base64

print("Input Audio")
display(Audio(f"{input_wav_path}", autoplay=False))
audio_data = open(output_audio, "rb").read()
print("Synthesized Audio")
display(Audio(audio_data, autoplay=True))
print("|")
print("|")
print("|")
if os.path.exists(output_audio):
    with open(output_audio, "rb") as f:
        audio_bytes = f.read()
    b64 = base64.b64encode(audio_bytes).decode("utf-8")
    href = f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(output_audio)}" style="font-size: 40px;">Download Audio</a>'
    display(HTML(href))
