In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# clone repository for wav2spectrogram function
!git clone https://github.com/eric-zhizu/EmotionalConversionStarGAN.git

In [None]:
# unzip processed data from google drive
!unzip '/content/drive/MyDrive/Emotional Speech Dataset (ESD)'

In [None]:
!unzip '/content/drive/MyDrive/final_project/data/esd_ser_processed_data.zip'

In [None]:
!pip install pyworld
!pip install librosa
!pip install pysptk
!pip install speechbrain

In [None]:
import pyworld
import librosa
import math
import numpy as np
import pysptk
from IPython.display import clear_output 

#MCD Calculation

In [None]:
SAMPLING_RATE = 16000
FRAME_PERIOD = 5
FFT_SIZE = 1024

def mcd(ref_wav_file, synth_wav_file):
    def load_wav(wav_file, sr):
        wav, _ = librosa.load(wav_file, sr=sr, mono=True)
        return wav

    def log_spec_dB_dist(x, y):
        log_spec_dB_const = 10.0 / math.log(10.0) * math.sqrt(2.0)
        diff = x - y
        return log_spec_dB_const * math.sqrt(np.inner(diff, diff))

    def wav_file_to_mcep(wav_file):
        wav = load_wav(wav_file, sr=SAMPLING_RATE)

        # Use WORLD vocoder to spectral envelope
        _, sp, _ = pyworld.wav2world(wav.astype(np.double), fs=SAMPLING_RATE,
                                    frame_period=FRAME_PERIOD, fft_size=FFT_SIZE)

        # Extract MCEP features
        mcep_size = 34
        alpha = 0.35
        mgc = pysptk.sptk.mcep(sp, order=mcep_size, alpha=alpha, maxiter=0,
                                etype=1, eps=1.0E-8, min_det=0.0, itype=3)
        
        return mgc
    
    ref_mgc = wav_file_to_mcep(ref_wav_file)
    synth_mgc = wav_file_to_mcep(synth_wav_file)

    min_cost, _ = librosa.sequence.dtw(ref_mgc[:, 1:].T, synth_mgc[:, 1:].T, 
                                                metric=log_spec_dB_dist)
    
    dist = np.mean(min_cost) / len(ref_mgc)

    return dist

# Loop through processed_data

In [None]:
!git clone https://github.com/eric-zhizu/EmotionalConversionStarGAN.git

In [None]:
!git fetch
!git show-ref
!git checkout syeda-extension

In [None]:
%cd /content/EmotionalConversionStarGAN/

/content/EmotionalConversionStarGAN


In [None]:
%cp -r /content/efs/processed_data /content/EmotionalConversionStarGAN/processed_data

In [None]:
!git pull origin syeda-extension

In [None]:
%cd /content/
!git clone https://github.com/speechbrain/speechbrain.git
%cd speechbrain
%pip install -r requirements.txt
%pip install --editable .
%pip install transformers

In [None]:
ckpt = '/content/drive/MyDrive/final_project/checkpoints/stargan_embed_5/400000.ckpt'
!python mcd_evaluate.py -c $ckpt

In [None]:
!pwd
import pickle
with open('file_emo_mappings.pkl', 'rb') as f:
    emo_dict = pickle.load(f)

print(emo_dict)