<a href="https://colab.research.google.com/github/kiurobox/python-rvc-cli/blob/main/colab/python_rvc_cli.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🚀 Python RVC Inference

In [None]:

#@title Initialization

from IPython.display import clear_output
import torch
import tensorflow as tf

print("\n\033[1m=== Environment Verification ===\033[0m")
print(f"PyTorch Version: {torch.__version__}")
print(f"TensorFlow Version: {tf.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Version: {torch.version.cuda}")
print(f"GPU Device: {torch.cuda.get_device_name(0)}")



print("\n\033[1m=== Initialization Complete ===\033[0m")
print("You can now proceed with subsequent cells!")

## Installation

In [None]:
# @title Clone





url = "https://github.com/kiurobox/python-rvc-cli.git"


!git clone $url
clear_output()

%cd /content/python-rvc-cli

In [None]:
# @title Install


!pip install -r requirements.txt
!pip uninstall torch torchvision torchaudio -y
!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cu121
clear_output()
print("Finished installing requirements!")

In [None]:
# @title Download models

from rvc.lib.tools.prerequisites_download import prerequisites_download_pipeline





if __name__ == "__main__":
    prerequisites_download_pipeline(models=True, exe=True)

## Infer

In [None]:
# @title Download model
# @markdown Hugging Face or Google Drive
model_link = "https://huggingface.co/Bredvige/Sonic2/resolve/main/Sonic.zip"  # @param {type:"string"}

!python rvc_cli.py download --model_link "{model_link}"

In [None]:
# @title Run Inference

import os

current_dir = os.getcwd()

model_name = "Sonic"  # @param {type:"string"}
model_folder = os.path.join(current_dir, f"logs/{model_name}")

if not os.path.exists(model_folder):
    raise FileNotFoundError(f"Model directory not found: {model_folder}")

files_in_folder = os.listdir(model_folder)
pth_path = next((f for f in files_in_folder if f.endswith(".pth")), None)
index_file = next((f for f in files_in_folder if f.endswith(".index")), None)

if pth_path is None or index_file is None:
    raise FileNotFoundError("No model found.")

pth_file = os.path.join(model_folder, pth_path)
index_file = os.path.join(model_folder, index_file)

input_path = "/content/python-rvc-cli/assets/separated_audios/Infinite_(Vocals)_model_mel_band_roformer_ep_3005_sdr_11.wav"  # @param {type:"string"}
output_path = "/content/output.wav" # @param {type:"string"}
export_format = "WAV"  # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}
f0_method = "hybrid[rmvpe+fcpe]"  # @param ["crepe", "crepe-tiny", "rmvpe", "fcpe", "hybrid[rmvpe+fcpe]"] {allow-input: false}
f0_up_key = 0  # @param {type:"slider", min:-24, max:24, step:0}
filter_radius = 3  # @param {type:"slider", min:0, max:10, step:0}
rms_mix_rate = 0.8  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
protect = 0.5  # @param {type:"slider", min:0.0, max:0.5, step:0.1}
index_rate = 0.6  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
hop_length = 128  # @param {type:"slider", min:1, max:512, step:0}
clean_strength = 0.7  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
split_audio = False  # @param{type:"boolean"}
clean_audio = False  # @param{type:"boolean"}
f0_autotune = False  # @param{type:"boolean"}
formant_shift = False # @param{type:"boolean"}
formant_qfrency = 1.0 # @param {type:"slider", min:1.0, max:16.0, step:0.1}
formant_timbre = 1.0 # @param {type:"slider", min:1.0, max:16.0, step:0.1}
embedder_model = "contentvec" # @param ["contentvec", "chinese-hubert-base", "japanese-hubert-base", "korean-hubert-base", "custom"] {allow-input: false}
embedder_model_custom = "" # @param {type:"string"}


# Post-processing effects
if "post_process" not in globals():
  post_process = False
if "reverb" not in globals():
  reverb = False
if "pitch_shift" not in globals():
  pitch_shift = False
if "limiter" not in globals():
  limiter = False
if "gain" not in globals():
  gain = False
if "distortion" not in globals():
  distortion = False
if "chorus" not in globals():
  chorus = False
if "bitcrush" not in globals():
  bitcrush = False
if "clipping" not in globals():
  clipping = False
if "compressor" not in globals():
  compressor = False
if "delay" not in globals():
  delay = False

if "reverb_room_size" not in globals():
  reverb_room_size = 0.5
if "reverb_damping" not in globals():
  reverb_damping = 0.5
if "reverb_wet_gain" not in globals():
  reverb_wet_gain = 0.0
if "reverb_dry_gain" not in globals():
  reverb_dry_gain = 0.0
if "reverb_width" not in globals():
  reverb_width = 1.0
if "reverb_freeze_mode" not in globals():
  reverb_freeze_mode = 0.0

if "pitch_shift_semitones" not in globals():
  pitch_shift_semitones = 0.0

if "limiter_threshold" not in globals():
  limiter_threshold = -1.0
if "limiter_release_time" not in globals():
  limiter_release_time = 0.05

if "gain_db" not in globals():
  gain_db = 0.0

if "distortion_gain" not in globals():
  distortion_gain = 0.0

if "chorus_rate" not in globals():
  chorus_rate = 1.5
if "chorus_depth" not in globals():
  chorus_depth = 0.1
if "chorus_center_delay" not in globals():
  chorus_center_delay = 15.0
if "chorus_feedback" not in globals():
  chorus_feedback = 0.25
if "chorus_mix" not in globals():
  chorus_mix = 0.5

if "bitcrush_bit_depth" not in globals():
  bitcrush_bit_depth = 4

if "clipping_threshold" not in globals():
  clipping_threshold = 0.5

if "compressor_threshold" not in globals():
  compressor_threshold = -20.0
if "compressor_ratio" not in globals():
  compressor_ratio = 4.0
if "compressor_attack" not in globals():
  compressor_attack = 0.001
if "compressor_release" not in globals():
  compressor_release = 0.1

if "delay_seconds" not in globals():
  delay_seconds = 0.1
if "delay_feedback" not in globals():
  delay_feedback = 0.5
if "delay_mix" not in globals():
  delay_mix = 0.5

!python rvc_cli.py infer --pitch "{f0_up_key}" --filter_radius "{filter_radius}" --volume_envelope "{rms_mix_rate}" --index_rate "{index_rate}" --hop_length "{hop_length}" --protect "{protect}" --f0_autotune "{f0_autotune}" --f0_method "{f0_method}" --input_path "{input_path}" --output_path "{output_path}" --pth_path "{pth_file}" --index_path "{index_file}" --split_audio "{split_audio}" --clean_audio "{clean_audio}" --clean_strength "{clean_strength}" --export_format "{export_format}" --embedder_model "{embedder_model}" --embedder_model_custom "{embedder_model_custom}" --formant_shifting "{formant_shift}" --formant_qfrency "{formant_qfrency}" --formant_timbre "{formant_timbre}" --post_process "{post_process}" --reverb "{reverb}" --pitch_shift "{pitch_shift}" --limiter "{limiter}" --gain "{gain}" --distortion "{distortion}" --chorus "{chorus}" --bitcrush "{bitcrush}" --clipping "{clipping}" --compressor "{compressor}" --delay "{delay}" --reverb_room_size "{reverb_room_size}" --reverb_damping "{reverb_damping}" --reverb_wet_gain "{reverb_wet_gain}" --reverb_dry_gain "{reverb_dry_gain}" --reverb_width "{reverb_width}" --reverb_freeze_mode "{reverb_freeze_mode}" --pitch_shift_semitones "{pitch_shift_semitones}" --limiter_threshold "{limiter_threshold}" --limiter_release_time "{limiter_release_time}" --gain_db "{gain_db}" --distortion_gain "{distortion_gain}" --chorus_rate "{chorus_rate}" --chorus_depth "{chorus_depth}" --chorus_center_delay "{chorus_center_delay}" --chorus_feedback "{chorus_feedback}" --chorus_mix "{chorus_mix}" --bitcrush_bit_depth "{bitcrush_bit_depth}" --clipping_threshold "{clipping_threshold}" --compressor_threshold "{compressor_threshold}" --compressor_ratio "{compressor_ratio}" --compressor_attack "{compressor_attack}" --compressor_release "{compressor_release}" --delay_seconds "{delay_seconds}" --delay_feedback "{delay_feedback}" --delay_mix "{delay_mix}"

In [None]:
#@title play ur audio output
# shit ahh


from IPython.display import Audio, display, clear_output


display(Audio(output_path, autoplay=True))

### **Advanced Settings**

In [None]:
# @title # Post-processing effects
post_process = False # @param{type:"boolean"}
reverb = False # @param{type:"boolean"}
pitch_shift = False # @param{type:"boolean"}
limiter = False # @param{type:"boolean"}
gain = False # @param{type:"boolean"}
distortion = False # @param{type:"boolean"}
chorus = False # @param{type:"boolean"}
bitcrush = False # @param{type:"boolean"}
clipping = False # @param{type:"boolean"}
compressor = False # @param{type:"boolean"}
delay = False # @param{type:"boolean"}

reverb_room_size = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
reverb_damping = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
reverb_wet_gain = 0.0 # @param {type:"slider", min:-20.0, max:20.0, step:0.1}
reverb_dry_gain = 0.0 # @param {type:"slider", min:-20.0, max:20.0, step:0.1}
reverb_width = 1.0 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
reverb_freeze_mode = 0.0 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

pitch_shift_semitones = 0.0 # @param {type:"slider", min:-12.0, max:12.0, step:0.1}

limiter_threshold = -1.0 # @param {type:"slider", min:-20.0, max:0.0, step:0.1}
limiter_release_time = 0.05 # @param {type:"slider", min:0.0, max:1.0, step:0.01}

gain_db = 0.0 # @param {type:"slider", min:-20.0, max:20.0, step:0.1}

distortion_gain = 0.0 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

chorus_rate = 1.5 # @param {type:"slider", min:0.1, max:10.0, step:0.1}
chorus_depth = 0.1 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
chorus_center_delay = 15.0 # @param {type:"slider", min:0.0, max:50.0, step:0.1}
chorus_feedback = 0.25 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
chorus_mix = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

bitcrush_bit_depth = 4 # @param {type:"slider", min:1, max:16, step:1}

clipping_threshold = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

compressor_threshold = -20.0 # @param {type:"slider", min:-60.0, max:0.0, step:0.1}
compressor_ratio = 4.0 # @param {type:"slider", min:1.0, max:20.0, step:0.1}
compressor_attack = 0.001 # @param {type:"slider", min:0.0, max:0.1, step:0.001}
compressor_release = 0.1 # @param {type:"slider", min:0.0, max:1.0, step:0.01}

delay_seconds = 0.1 # @param {type:"slider", min:0.0, max:1.0, step:0.01}
delay_feedback = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
delay_mix = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

## UVR

In [None]:
#@title wav downloader for separator





url_input = "https://youtu.be/Ki_Nn95hj48?si=VDi2DcF5xkad9Nse"  #@param {type:"string"}
#@markdown **NOTE:** output will saved on `/content/python-rvc-cli/assets/yt-dlp/`



output_dir = "/content/python-rvc-cli/assets/yt-dlp"


!python download_audio.py $url_input --output $output_dir

In [None]:
#@title audio separator


audio_file = "/content/python-rvc-cli/assets/yt-dlp/Infinite.wav"  #@param {type:"string"}


#@markdown **NOTE:** output will saved on `/content/python-rvc-cli/assets/separated_audios/`
output_dir = "/content/python-rvc-cli/assets/separated_audios/"

!python uvr_cli.py --audio_file $audio_file --output_dir $output_dir