<a href="https://colab.research.google.com/github/thatneos/rvc-colab/blob/main/RVC_Infer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title ## Install RVC
from google.colab import drive
import os
import time
from ipywidgets import Button

start_total_t = time.time()

if os.environ["COLAB_GPU"]:
  print("a GPU is connected. Things should be working normally.\nIf there are any errors dm @eempostor on discord. Enjoy!")
else:
  print("GPU isn't connected. Inference would take a long time and might crash.\nI recommend connecting to a GPU. You have been warned.")

main_dir = "/content/rvc"
#@markdown (OPTIONAL) Mount your google drive to the runtime.
mount_drive = False #@param {type:"boolean"}

if mount_drive and not os.path.exists("/content/drive"):
  start_mount_t = time.time()
  print("- Mounting drive.")
  drive.mount("/content/drive")
  end_mount_t = time.time()
  print(f"- Mounting completed! ({round(end_mount_t-start_mount_t, 2)}s)")
if not os.path.exists(main_dir):
  start_download_t = time.time()
  print("- Downloading files for inference.")
  !git clone -q https://huggingface.co/Thatneos/rvc
  os.chdir(main_dir)
  !wget -q -O assets/fcpe/fcpe.pt https://huggingface.co/datasets/NeoPy/rvc-base/resolve/main/fcpe.pt
  !wget -q -O assets/hubert/hubert_base.pt https://huggingface.co/NeoPy/rvc-base/resolve/main/hubert_base.pt
  !wget -q -O assets/rmvpe/rmvpe.pt https://huggingface.co/NeoPy/rvc-base/resolve/main/rmvpe.pt
  end_download_t = time.time()
  print(f"- Downloading completed! ({round(end_download_t-start_download_t, 2)}s)")
else:
  print("- Files are already downloaded.")
start_install_t = time.time()
print("- Installing dependencies.")
!pip install pip==24.0
!pip install -q av uv
!uv pip install -q ffmpeg-python>=0.2.0
!uv pip install -q faiss_cpu==1.7.3
!uv pip install -q praat-parselmouth==0.4.2
!uv pip install -q pyworld==0.3.4
!uv pip install -q resampy==0.4.2
!uv pip install -q fairseq==0.12.2
!uv pip install -q pydub==0.25.1
!uv pip install -q einops
!uv pip install -q local_attention
!uv pip install -q torchcrepe==0.0.23
!uv pip install -q torchfcpe
!uv pip install -q audio-separator[gpu]==0.30.1
!uv pip install -q  git+https://github.com/One-sixth/fairseq.git
end_install_t = time.time()



Button(description="\u2714 Success", button_style="success")

end_total_t = time.time()
print(f"- Installation completed! ({round(end_install_t-start_install_t, 2)}s)\nTotal time: {round((end_total_t-start_total_t)/60, 2)} minutes")

In [None]:
#@title ## Download models using urls

import os
import zipfile
import shutil
import urllib.request
import gdown
import subprocess

main_dir = "/content/rvc"
os.chdir(main_dir)
models_dir = "models"

def extract_zip(extraction_folder, zip_name):
    os.makedirs(extraction_folder)
    with zipfile.ZipFile(zip_name, 'r') as zip_ref:
        zip_ref.extractall(extraction_folder)
    os.remove(zip_name)

    index_filepath, model_filepath = None, None
    for root, dirs, files in os.walk(extraction_folder):
        for name in files:
            if name.endswith('.index') and os.stat(os.path.join(root, name)).st_size > 1024 * 100:
                index_filepath = os.path.join(root, name)

            if name.endswith('.pth') and os.stat(os.path.join(root, name)).st_size > 1024 * 1024 * 40:
                model_filepath = os.path.join(root, name)

    if not model_filepath:
        raise Exception(f'No .pth model file was found in the extracted zip. Please check {extraction_folder}.')

    # move model and index file to extraction folder
    os.rename(model_filepath, os.path.join(extraction_folder, os.path.basename(model_filepath)))
    if index_filepath:
        os.rename(index_filepath, os.path.join(extraction_folder, os.path.basename(index_filepath)))

    # remove any unnecessary nested folders
    for filepath in os.listdir(extraction_folder):
        if os.path.isdir(os.path.join(extraction_folder, filepath)):
            shutil.rmtree(os.path.join(extraction_folder, filepath))

def download_online_model(url, dir_name):
    try:
        print(f'[~] Downloading voice model with name {dir_name}...')
        zip_name = url.split('/')[-1]
        extraction_folder = os.path.join(models_dir, dir_name)
        if os.path.exists(extraction_folder):
            raise Exception(f'Voice model directory {dir_name} already exists! Choose a different name for your voice model.')

        if 'pixeldrain.com' in url:
            url = f'https://pixeldrain.com/api/file/{zip_name}'
        if 'drive.google.com' in url:
          zip_name = dir_name + ".zip"
          gdown.download(url, output=zip_name, use_cookies=True, quiet=True, fuzzy=True)
        else:
        	urllib.request.urlretrieve(url, zip_name)

        print(f'[~] Extracting zip file...')
        extract_zip(extraction_folder, zip_name)
        print(f'[+] {dir_name} Model successfully downloaded!')

    except Exception as e:
        raise Exception(str(e))

#@markdown Enter in the model download url. The url could be a Google Drive url, HuggingFace url, or Pixeldrain url.
url = "" # @param {type:"string"}
#@markdown Enter in your desired model name.
dir_name = "" # @param {type:"string"}

download_online_model(url, dir_name)

In [None]:
#@title ## Inference
import os
main_dir = "/content/Harmonify"
os.chdir(main_dir)
from lib.infer import infer_audio
from google.colab import files
from pydub import AudioSegment
import shutil

#@markdown ### | MAIN SETTINGS |
#@markdown Enter in your model name. It will automaticly search a folder containing the pth file and index file.
MODEL_NAME = "" #@param {type:"string"}
#@markdown Enter in your audio path. Leave blank to upload audio from the cell.
SOUND_PATH = "" #@param {type:"string"}
#@markdown Change pitches in semitones.
F0_CHANGE = 0 #@param {type:"integer"}
#@markdown Select an f0 method. You can use a custom hybrid method by typing "hybrid[method1+method2+...]".
F0_METHOD = "fcpe" #@param ["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "fcpe_legacy", "hybrid[mangio-crepe+rmvpe]", "hybrid[mangio-crepe+fcpe]", "hybrid[rmvpe+fcpe]", "hybrid[mangio-crepe+rmvpe+fcpe]"] {allow-input:true}
#@markdown ### | OTHER SETTINGS |
MIN_PITCH = "50" #@param {type:"string"}
MAX_PITCH = "1100" #@param {type:"string"}
CREPE_HOP_LENGTH = 120 #@param {type:"integer"}
INDEX_RATE = 0.75 #@param {type:"number"}
FILTER_RADIUS = 3 #@param {type:"integer"}
RMS_MIX_RATE = 0.25 #@param {type:"number"}
PROTECT = 0.33 #@param {type:"number"}
#@markdown ### | ADVANCED SETTINGS |
#@markdown Split input audio into smaller chunks by detecting silence, infering them, and then combining them with the purpose of getting cleaner results. Turning this on will longer inference time.
SPLIT_INFER = False #@param {type:"boolean"}
#@markdown [SPLIT INFER SETTINGS] The minimum length for any silence section. Measured in miliseconds.
MIN_SILENCE = 500 #@param {type:"number"}
#@markdown [SPLIT INFER SETTINGS] The upper bound for how quiet is silence in dFBS.
SILENCE_THRESHOLD = -50 #@param {type:"number"}
#@markdown [SPLIT INFER SETTINGS] Step size for interating over the audio in ms.
SEEK_STEP = 1 #@param {type:"slider", min:1, max:10, step:1}
#@markdown [SPLIT INFER SETTINGS] Leave some silence at the beginning and end of the chunks to keep the audio from sounding like it is abruptly cut off. Measured in miliseconds.
KEEP_SILENCE = 200 #@param {type:"number"}
#@markdown Turn on for a better male to female and vice versa conversion. Turning this on will **significantly** longer inference time. (VERY EXPERIMENTAL).
FORMANT_SHIFT = False #@param {type:"boolean"}
#@markdown [FORMANT SHIFT SETTINGS] Controls the rate of change of the frequencies in the audio. Increasing this rate can make the frequencies change more rapidly, which can make the sound higher in pitch.
QUEFRENCY = 0 #@param {type:"number"}
#@markdown [FORMANT SHIFT SETTINGS] Controls "sharpness" of the audio. Too high of timbre will result in a voice that sounds unnatural, overly processed, or harsh.
TIMBRE = 1 #@param {type:"number"}
#@markdown Finds the closest note in terms of frequency from a predefined set of musical notes, simulating a basic autotune mechanism.
F0_AUTOTUNE = False #@param {type:"boolean"}
#@markdown ### | OUTPUT SETTINGS |
#@markdown Specify the desired output format.
OUTPUT_FORMAT = "wav" #@param ["wav", "flac", "mp3"]

if not SOUND_PATH:
    os.chdir(os.path.join(main_dir, "audio_input"))
    uploaded_audio = files.upload()
    assert len(uploaded_audio) == 1, "Please only input audio one at a time"
    SOUND_PATH = os.path.join(os.getcwd(), list(uploaded_audio.keys())[0])
    print(f"To use this audio again without reuploading. Please copy this **{SOUND_PATH}** and paste it in SONG_INPUT")
    os.chdir(main_dir)

os.system("chmod +x stftpitchshift")

inferred_audio = infer_audio(
    MODEL_NAME,
    SOUND_PATH,
    F0_CHANGE,
    F0_METHOD,
    MIN_PITCH,
    MAX_PITCH,
    CREPE_HOP_LENGTH,
    INDEX_RATE,
    FILTER_RADIUS,
    RMS_MIX_RATE,
    PROTECT,
    SPLIT_INFER,
    MIN_SILENCE,
    SILENCE_THRESHOLD,
    SEEK_STEP,
    KEEP_SILENCE,
    FORMANT_SHIFT,
    QUEFRENCY,
    TIMBRE,
    F0_AUTOTUNE,
    OUTPUT_FORMAT
)
os.chdir(main_dir)

print(f"Showing {inferred_audio}.")
AudioSegment.from_file(inferred_audio)