<a href="https://colab.research.google.com/github/mhuckvale/voice/blob/main/Embedding_PCA_Demonstration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Demonstrate FreeVC with Principal Components of a Deep-Speaker embedding trained on Globe sample dataset


# Configuration

In [None]:
%cd /content
!pip3 install torch torchaudio torchvision torchtext torchdata webrtcvad
!rm -rf FreeVC
!git clone https://github.com/OlaWod/FreeVC.git
%cd FreeVC
!pwd

# 1. Set up Python environment

In [None]:
import math
import pandas as pd
import numpy as np

import ipywidgets as widgets
from ipywidgets import HBox, VBox, Layout
from IPython.display import Audio
%matplotlib inline


# 2. Load the FreeVC model and weights trained on Globe

In [None]:
!rm -rf voice
!git clone https://github.com/mhuckvale/voice.git

%cd /content/FreeVC
!cp /content/FreeVC/voice/FreeVC/freevc.py .
!cp /content/FreeVC/voice/FreeVC/checkpoints/*.pt* checkpoints

# download the voice conversion model
!wget -O checkpoints/G_30000.pth https://avatartherapy.co.uk/download/G_30000.pth

# download wavlm
!wget -O wavlm/WavLM-Large.pt https://avatartherapy.co.uk/download/WavLM-Large.pt

# use FreeVC to apply speaker embedding to an audio file
import os
from types import SimpleNamespace
from freevc import FreeVC

args = SimpleNamespace()
args.hpfile="configs/freevc.json"
args.ptfile="checkpoints/G_30000.pth"
args.spfile='checkpoints/pretrained_bak_5805000.pt'
args.outdir="output"
print(args)

os.makedirs(args.outdir, exist_ok=True)
freevc=FreeVC()
freevc.load(args)

# 3. Load scaling factors and PCA loadings for VQ parameters

In [None]:
!cp /content/FreeVC/voice/FreeVC/globe-params-pcacomp.txt .


# VQ parameter PCA
components=pd.read_csv("globe-embed256-pcacomp.txt",header=False)
print(components)



# 4. Calculate the VQ parameters from principal components

In [None]:
def calculate_embedding(pca):
    pvalues=[0]*256
    for i,value in enumerate(pca):
        for j in range(256):
            pvalues[j] = pvalues[j] + pca[i] * components.iloc[i,j]
    #print(pvalues)
    return(pvalues)

# 5. Build sliders for first 8 principal components of embedding

In [None]:
# function to respond to change in value of slider
def value_change(change):
    slider=change['owner']
    pcatext.value=slider.description + '=' + str(change['new'])
    values=[]
    for slider in pca_sliders:
        values.append(slider.value)
    update_params(values)

# build sliders
pca_sliders=[]
for i in range(8):
    slider=widgets.FloatSlider(value=0.,min=-5.0,max=5.0,step=0.5,description='PCA'+str(i+1),readout_format='.2f')
    slider.observe(value_change,names='value')
    pca_sliders.append(slider)
pcatext=widgets.Text(value='',placeholder='',description='Status:',disabled=False)

# reset button
reset_button = widgets.Button(description='Reset')
def on_reset(b):
    for slider in pca_sliders:
        slider.value=0.1
        slider.value=0
reset_button.on_click(on_reset)

# PCA Synthesis button
pcago_button = widgets.Button(description='Go PCA')
def on_pcago(b):
    runconversion(0)
pcago_button.on_click(on_pcago)


# 9. Run voice conversion from VQ or PCA values

In [None]:
!cp /content/FreeVC/voice/FreeVC/*.wav .

# select audio
import glob
wavlist=glob.glob("*.wav")
sndlist=[ s.replace(".wav","") for s in wavlist]
options=list(zip(sndlist,wavlist))
wavselect=widgets.Dropdown(options=options,value='whitelight.wav',description="Audio",disabled=False)

from IPython.display import Audio, clear_output
output4 = widgets.Output(layout={'border': '1px solid black'})

def runconversion():
    with output4:
        # get  PCA slider values and calculate VQ params
        pcavalues=[]
        for slider in pca_sliders:
            pcavalues.append(slider.value)
        # create the embedding from the PCA
        pcatext.value="calculate embedding"
        speaker_embedding=calculate_embedding(pcavalues)
        # perform conversion
        pcatext.value="voice conversion started"
        freevc.convert(wavselect.value,speaker_embedding,'out.wav')
        # replay audio
        pcatext.value="replaying"
        clear_output(wait=True)
        player = Audio('output/out.wav',autoplay=True)
        display(player)

output4

# 10. Create the user interface

In [None]:
# layout
title=widgets.HTML("<h2>Principal Components</h2>")
box_layout = Layout(display='flex', flex_flow='column', align_items='center',border='solid', width='50%')

HBox(children=[
    VBox([title,*pca_sliders,wavselect,HBox([reset_button,pcago_button])],layout=box_layout),
])
