# Speech analysis and re-synthesis

This notebook demonstrates how to analyze speech and re-synthesis speech waveform from speech parameters using [pysptk](https://github.com/r9y9/pysptk) (and other useful speech/audio/music analysis packages). Synthesized audio examples are provided so that you are able to compare synthesis filters on your browser.

## Requirements

- pysptk: https://github.com/r9y9/pysptk
- scipy
- librosa: https://github.com/bmcfee/librosa
- seaborn: https://github.com/mwaskom/seaborn

In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import matplotlib
import seaborn
seaborn.set(style="dark")
rcParams['figure.figsize'] = (16, 5)

In [3]:
from IPython.display import Audio
import IPython.display

In [4]:
import numpy as np
import sys
import librosa
import librosa.display
import pysptk
from scipy.io import wavfile

## Data Load

In [5]:
import copy
import fnmatch
import os
import random
import re

import pandas as pd
import json

def find_files(directory, pattern='*.wav'):
    '''Recursively finds all files matching the pattern.'''
    files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(os.path.join(root, filename))
    return files

## Mel-frequency cepstrum extraction

In [6]:
def generate_mfcc(directory, sample_rate, lc_dir_name, n_mfcc, lc_ext_name=".csv"):
    files = find_files(directory,  pattern="p225*.wav")
    for index, filename in enumerate(files):
        print(str(index)+"/"+str(len(files)))
        print(filename)
        lc_filename = copy.deepcopy(filename)
        if lc_filename.endswith('.wav'):
            lc_filename = lc_filename[:-4] + lc_ext_name
        lc_filename = lc_filename.replace("wav48", lc_dir_name)
        if os.path.isfile(lc_filename):
            IPython.display.clear_output(wait=True)
            continue
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc) # shape = (n_mfcc, t)
        df = pd.DataFrame(mfcc.T)
        print(str(mfcc.T.shape))
        if not os.path.exists(os.path.dirname(lc_filename)):
            try:
                os.makedirs(os.path.dirname(lc_filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        print(lc_filename)
        df.to_csv(lc_filename, sep=',', header=None, index=None) 
        IPython.display.clear_output(wait=True)

In [7]:
def generate_lc_map(directory):
    files = find_files(directory, pattern="p225*.wav")
    print("files length: {}".format(len(files)))
    lookup = {}
    for filename in files:
        p_filename = filename.split(directory)[-1]
        print(filename +" "+p_filename)
        # p_lc_filename = p_filename.replace("wav48", "mfcc").replace(".wav", ".csv")
        p_lc_filename = "../../mfcc40/p225"+p_filename.replace(".wav", ".csv")
        lookup[p_filename] = p_lc_filename
    with open(directory+"/mfcc40_p225.json", "w") as output:
        json.dump(lookup, output)

In [9]:
generate_mfcc("../../VCTK-Corpus", 16000, "mfcc40", 40, lc_ext_name=".csv")

230/231
../../VCTK-Corpus/wav48/p225/p225_145.wav
(63, 40)
../../VCTK-Corpus/mfcc40/p225/p225_145.csv
