<a href="https://colab.research.google.com/github/szymonrucinski/generate-voiceover/blob/main/DEEPFAKE_AUDIO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DEEPFAKE-AUDIO

In [2]:
# @markdown STEPS

#@markdown 1 Clone The Project

#@markdown 2 Download Pretrained Models

#@markdown 3 Initialize The Voice Cloning Models

import os
from os.path import exists, join, basename, splitext

git_repo_url = 'https://github.com/CorentinJ/Real-Time-Voice-Cloning.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # clone and install
  !git clone -q --recursive {git_repo_url}
  # install dependencies
  !cd {project_name} && pip install -q -r requirements.txt
  !pip install -q gdown
  !apt-get install -qq libportaudio2
  !pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip

  # download pretrained model
!cd {project_name} && wget https://filedn.eu/lJe8HQehDK0jkgvBcE4bDl8/pretrained.zip && unzip -o pretrained.zip

import sys
sys.path.append(project_name)

from IPython.display import display, Audio, clear_output
from IPython.utils import io
import ipywidgets as widgets
import numpy as np
from dl_colab_notebooks.audio import record_audio, upload_audio

from synthesizer.inference import Synthesizer
from encoder import inference as encoder
from vocoder import inference as vocoder
from pathlib import Path

encoder.load_model(project_name / Path("encoder/saved_models/pretrained.pt"))
synthesizer = Synthesizer(project_name / Path("synthesizer/saved_models/pretrained/pretrained.pt"))
vocoder.load_model(project_name / Path("vocoder/saved_models/pretrained/pretrained.pt"))

[K     |████████████████████████████████| 11.3 MB 7.8 MB/s 
[K     |████████████████████████████████| 15.4 MB 77.1 MB/s 
[K     |████████████████████████████████| 3.1 MB 71.6 MB/s 
[K     |████████████████████████████████| 8.3 MB 10 kB/s 
[K     |████████████████████████████████| 76 kB 5.2 MB/s 
[K     |████████████████████████████████| 86 kB 5.6 MB/s 
[K     |████████████████████████████████| 235 kB 70.4 MB/s 
[K     |████████████████████████████████| 138 kB 74.3 MB/s 
[K     |████████████████████████████████| 676 kB 86.2 MB/s 
[K     |████████████████████████████████| 66 kB 4.7 MB/s 
[K     |████████████████████████████████| 965 kB 66.0 MB/s 
[K     |████████████████████████████████| 361 kB 70.3 MB/s 
[K     |████████████████████████████████| 59.9 MB 1.1 MB/s 
[K     |████████████████████████████████| 1.1 MB 45.8 MB/s 
[K     |████████████████████████████████| 55 kB 3.4 MB/s 
[K     |████████████████████████████████| 62 kB 1.5 MB/s 
[?25h  Building wheel for umap-lea

In [5]:
#@title RECORD OR UPLOAD
#@markdown * Either Record Audio from Microphone or Upload Audio from File (.mp3 or .wav) 

SAMPLE_RATE = 22050
record_or_upload = "Upload (.mp3 or .wav)" #@param ["Record", "Upload (.mp3 or .wav)"]
record_seconds =   10#@param {type:"number", min:1, max:10, step:1}

embedding = None
def _compute_embedding(audio):
  display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))
  global embedding
  embedding = None
  embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))
def _record_audio(b):
  clear_output()
  audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)
def _upload_audio(b):
  clear_output()
  audio = upload_audio(sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)

if record_or_upload == "Record":
  button = widgets.Button(description="Record Your Voice")
  button.on_click(_record_audio)
  display(button)
else:
  #button = widgets.Button(description="Upload Voice File")
  #button.on_click(_upload_audio)
  _upload_audio("vader.mp3")

MessageError: ignored

In [None]:
#@title SYNTHESIZE A TEXT { run: "auto" }
text = "A falling star or a shooting star has nothing at all to do with a star! These amazing streaks of light you can sometimes see in the night sky are caused by tiny bits of dust and rock called meteoroids falling into the Earth's atmosphere and burning up." #@param {type:"string"}
  
def synthesize(embed, text):
  print("Synthesizing new audio...")
  #with io.capture_output() as captured:
  specs = synthesizer.synthesize_spectrograms([text], [embed])
  generated_wav = vocoder.infer_waveform(specs[0])
  generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
  clear_output()
  display(Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True))

if embedding is None:
  print("first record a voice or upload a voice file!")
else:
  synthesize(embedding, text)