# MGC Feature Extraction
This notebook demonstrates how to analyze speech and re-synthesis speech waveform from speech parameters using [pysptk](https://github.com/r9y9/pysptk) (and other useful speech/audio/music analysis packages). Synthesized audio examples are provided so that you are able to compare synthesis filters on your browser.

## Requirements

- pysptk: https://github.com/r9y9/pysptk
- scipy
- librosa: https://github.com/bmcfee/librosa
- seaborn: https://github.com/mwaskom/seaborn

In [12]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [13]:
import matplotlib
import seaborn
seaborn.set(style="dark")
rcParams['figure.figsize'] = (16, 5)

In [14]:
from IPython.display import Audio
import IPython.display

In [15]:
import numpy as np
import sys
import librosa
import librosa.display
import pysptk
from scipy.io import wavfile

## Data Load

In [16]:
import copy
import fnmatch
import os
import random
import re

import pandas as pd
import json

def find_files(directory, pattern='*.wav'):
    '''Recursively finds all files matching the pattern.'''
    files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(os.path.join(root, filename))
    return files

## Source parameter extraction

### Framing and windowing

In [17]:
frame_length = 1024
hop_length = 80

def windowing(x):
    # Note that almost all of pysptk functions assume input array is C-contiguous and np.float4 element type
    frames = librosa.util.frame(x, frame_length=frame_length, hop_length=hop_length).astype(np.float64).T

    # Windowing
    frames *= pysptk.blackman(frame_length)
    assert frames.shape[1] == frame_length 
    return frames

### F0 estimation

In [18]:
# F0 estimation
def f0_estimation(x):
    f0 = pysptk.swipe(x.astype(np.float64), fs=sr, hopsize=hop_length, min=60, max=240, otype="f0")
    plot(f0, linewidth=3, label="F0 trajectory estimated by SWIPE'")
    xlim(0, len(f0))
    legend(prop={'size': 16})
    return f0

## Mel-generalized cepstrum extraction

In [19]:
# Order of mel-cepstrum
order = 25
alpha = 0.41
stage = 5
gamma = -1.0 / stage

def mgcep(frames, sr, order=order):
    mgc = pysptk.mgcep(frames, order, alpha, gamma, min_det=0)
    # logH = pysptk.mgc2sp(mgc, alpha, gamma, frame_length).real
    # librosa.display.specshow(logH.T, sr=sr, hop_length=hop_length, x_axis="time", y_axis="linear")
    # colorbar()
    # title("Spectral envelope estimate from mel-generalized cepstrm")
    # sys.stdout.flush()
    return mgc

In [20]:
def generate_mgc(directory, sample_rate, lc_ext_name=".csv"):
    files = find_files(directory,  pattern="*.wav")
    for index, filename in enumerate(files):
        print(str(index)+"/"+str(len(files)))
        print(filename)
        lc_filename = copy.deepcopy(filename)
        if lc_filename.endswith('.wav'):
            lc_filename = lc_filename[:-4] + lc_ext_name
        lc_filename = lc_filename.replace("wav", "mgc")
        if os.path.isfile(lc_filename):
           IPython.display.clear_output(wait=True)
           continue
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        frames = windowing(audio)
        mgc = mgcep(frames, sample_rate)
        print(str(mgc.shape))
        df = pd.DataFrame(mgc)
        if not os.path.exists(os.path.dirname(lc_filename)):
            try:
                os.makedirs(os.path.dirname(lc_filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        print(lc_filename)
        df.to_csv(lc_filename, sep=',', header=None, index=None) 
        IPython.display.clear_output(wait=True)

In [29]:
def generate_lc_map(directory):
    files = find_files(directory, pattern="*.wav")
    print("files length: {}".format(len(files)))
    lookup = {}
    for filename in files:
        p_filename = filename.split(directory)[-1]
        p_lc_filename = p_filename.replace("wav/", "mgc/").replace(".wav", ".csv")
        #p_lc_filename = "../../mgc26/"+p_filename.replace(".wav", ".csv")
        lookup[p_filename] = p_lc_filename
    with open(directory+"/mgc.json", "w") as output:
        json.dump(lookup, output)
    print lookup

In [22]:
generate_mgc("../../cmu_us_slt_arctic", 16000, lc_ext_name=".csv")

1131/1132
../../cmu_us_slt_arctic/wav/arctic_a0166.wav


In [30]:
generate_lc_map("../../cmu_us_slt_arctic/")

files length: 1132
{'wav/arctic_a0566.wav': 'mgc/arctic_a0566.mgc', 'wav/arctic_b0212.wav': 'mgc/arctic_b0212.mgc', 'wav/arctic_a0063.wav': 'mgc/arctic_a0063.mgc', 'wav/arctic_b0374.wav': 'mgc/arctic_b0374.mgc', 'wav/arctic_b0111.wav': 'mgc/arctic_b0111.mgc', 'wav/arctic_b0152.wav': 'mgc/arctic_b0152.mgc', 'wav/arctic_b0221.wav': 'mgc/arctic_b0221.mgc', 'wav/arctic_a0104.wav': 'mgc/arctic_a0104.mgc', 'wav/arctic_b0538.wav': 'mgc/arctic_b0538.mgc', 'wav/arctic_b0009.wav': 'mgc/arctic_b0009.mgc', 'wav/arctic_b0396.wav': 'mgc/arctic_b0396.mgc', 'wav/arctic_b0503.wav': 'mgc/arctic_b0503.mgc', 'wav/arctic_a0397.wav': 'mgc/arctic_a0397.mgc', 'wav/arctic_a0163.wav': 'mgc/arctic_a0163.mgc', 'wav/arctic_a0504.wav': 'mgc/arctic_a0504.mgc', 'wav/arctic_b0164.wav': 'mgc/arctic_b0164.mgc', 'wav/arctic_a0166.wav': 'mgc/arctic_a0166.mgc', 'wav/arctic_a0523.wav': 'mgc/arctic_a0523.mgc', 'wav/arctic_a0248.wav': 'mgc/arctic_a0248.mgc', 'wav/arctic_a0184.wav': 'mgc/arctic_a0184.mgc', 'wav/arctic_a0527.wa