* `predict()`: save npy files for convnet feature
* `get_mfcc()`: save npy file for mfcc (+d, dd) features

In [1]:
import os
import sys
import numpy as np
import keras
import models
from argparse import Namespace
import pdb
import pandas as pd
import librosa
import time
from multiprocessing import Pool
from joblib import Parallel, delayed
from keras import backend as K
from utils_featext import *

Couldn't import dot_parser, loading of dot files will not be possible.


Using TensorFlow backend.


# GLOBAL SETTINGS

In [6]:
# A base path for datasets
PATH_DATASETS = '/misc/kcgscratch1/ChoGroup/keunwoo/datasets/'
# Jamendo is here after trimming
PATH_PROCESSED = '/misc/kcgscratch1/ChoGroup/keunwoo/datasets_processed/'
PATH_URBANSOUND = '/misc/kcgscratch1/ChoGroup/keunwoo/UrbanSound8K/audio'
# A folder to store csv files
FOLDER_CSV = 'data_csv/'
# A folder to store extracted features
FOLDER_FEATS = 'data_feats/'
if not os.path.exists(FOLDER_CSV):
    os.mkdir(FOLDER_CSV)
if not os.path.exists(FOLDER_FEATS):
    os.mkdir(FOLDER_FEATS)

# Some constants for my convnet
SR = 12000 # [Hz]
len_src = 29. # [second]
N_JOBS = 9
ref_n_src = 12000 * 29

In `PATH_DATASETS`:

```shell
keunwoo@weaver4[datasets]$ pwd
/misc/kcgscratch1/ChoGroup/keunwoo/datasets
keunwoo@weaver4[datasets]$ ls -l
drwx------. 16 keunwoo keunwoo       4096 Dec 29 16:42 ballroom_extended_2016
drwx------.  3 keunwoo keunwoo       4096 Dec 29 16:19 emoMusic45s
drwx------.  3 keunwoo keunwoo         27 Dec 29 16:19 gtzan_genre
drwx------.  3 keunwoo keunwoo         33 Dec 29 16:20 gtzan_music_speech
drwx------.  6 keunwoo keunwoo       4096 Dec 29 18:21 jamendo_voice_activity
drwx------.  4 keunwoo keunwoo       4096 Nov 29 02:46 UrbanSound8K
```

In `PATH_PROCESSED`:

```shell
keunwoo@weaver4[datasets_processed]$ pwd
/misc/kcgscratch1/ChoGroup/keunwoo/datasets_processed
keunwoo@weaver4[datasets_processed]$ ls -l
drwx------. 5 keunwoo keunwoo 57 Dec 29 19:20 jamendo_trimmed
```

## FUNCTIONS

In [9]:
def gen_filepaths(df, dataroot=None):
    """Generate file path (column name 'filepath') from given dataframe """
    if dataroot is None:
        dataroot = PATH_DATASETS
    for filepath in df['filepath']:
        yield os.path.join(dataroot, filepath)

def gen_audiofiles(df, batch_size=256, dataroot=None):
    '''gen single audio file src in a batch_size=1 form for keras model.predict_generator
    df: dataframe 
    total_size: integer.
    batch_size: integer.
    dataroot: root path for data'''

    ''''''
    pool = Pool(N_JOBS)
    def _multi_loading(pool, paths):
        srcs = pool.map(_load_audio, paths)
        srcs = np.array(srcs)
        try:
            srcs = srcs[:, np.newaxis, :]
        except:
            pdb.set_trace()

        return srcs
    
    total_size = len(df)
    n_leftover = int(total_size % batch_size)
    leftover = n_leftover != 0
    n_batch = int(total_size / batch_size)
    gen_f = gen_filepaths(df, dataroot=dataroot)
    print('n_batch: {}, n_leftover: {}, all: {}'.format(n_batch, n_leftover, total_size))
    
    for batch_idx in xrange(n_batch):
        paths = []
        for inbatch_idx in range(batch_size):
            paths.append(gen_f.next())
        print('..yielding {}/{} batch..'.format(batch_idx, n_batch))                    
        yield _multi_loading(pool, paths)
        
    if leftover:
        paths = []
        for inbatch_idx in range(n_leftover):
            paths.append(gen_f.next())
        print('..yielding final batch w {} data sample..'.format(len(paths)))
        yield _multi_loading(pool, paths)

def _load_audio(path):
    """Load audio file at path with sampling rate=SR, duration=len_src, and return it"""
    src, sr = librosa.load(path, sr=SR, duration=len_src * SR / float(SR))
    src = src[:ref_n_src]
    result = np.zeros(ref_n_src)
    result[:len(src)] = src[:ref_n_src]
    return result

### Function for convnet feature

In [11]:
def predict(filename, batch_size, model, dataroot=None, npy_suffix=''):
    """Extract convnet feature using given model"""
    if dataroot is None:
        dataroot = PATH_DATASETS
    start = time.time()
    csv_filename = '{}.csv'.format(filename)
    npy_filename = '{}{}.npy'.format(filename, npy_suffix)
    df = pd.DataFrame.from_csv(os.path.join(FOLDER_CSV, csv_filename))
    print('{}: Dataframe with size:{}').format(filename, len(df))
    example_path = os.path.join(dataroot, df['filepath'][0])
    print('An example path - does it exists? {}'.format(os.path.exists(example_path)))
    print(df.columns)
    gen_audio = gen_audiofiles(df, batch_size, dataroot)
    feats = model.predict_generator(generator=gen_audio, 
                                    val_samples=len(df), 
                                    max_q_size=1)
    np.save(os.path.join(FOLDER_FEATS, npy_filename), feats)
    print('DONE! in {:6.4f} sec'.format(time.time() - start))

### functions for mfcc

In [12]:
# for mfcc
def get_mfcc(filename, dataroot=None):    
    start = time.time()
    csv_filename = '{}.csv'.format(filename)
    npy_filename = '{}_mfcc.npy'.format(filename)
    df = pd.DataFrame.from_csv(os.path.join(FOLDER_CSV, csv_filename))
    print('{}: Dataframe with size:{}').format(filename, len(df))
    print(os.path.exists(os.path.join(dataroot, df['filepath'][0])))   
    print(df.columns)
    gen_f = gen_filepaths(df, dataroot=dataroot)

    pool = Pool(N_JOBS)
    paths = list(gen_f)
    feats = pool.map(_path_to_mfccs, paths)
    feats = np.array(feats)
    np.save(os.path.join(FOLDER_FEATS, npy_filename), feats)
    print('MFCC is done! in {:6.4f} sec'.format(time.time() - start))
    pool.close()
    pool.join()
    
def _path_to_mfccs(path):
    src_zeros = np.zeros(1024) # min length to have 3-frame mfcc's
    src, sr = librosa.load(path, sr=SR, duration=29.) # max len: 29s, can be shorter.
    if len(src) < 1024:
        src_zeros[:len(src)] = src
        src = src_zeros
    
    mfcc = librosa.feature.mfcc(src, SR, n_mfcc=20)
    dmfcc = mfcc[:, 1:] - mfcc[:, :-1]
    ddmfcc = dmfcc[:, 1:] - dmfcc[:, :-1]
    return np.concatenate((np.mean(mfcc, axis=1), np.std(mfcc, axis=1),
                           np.mean(dmfcc, axis=1), np.std(dmfcc, axis=1),
                           np.mean(ddmfcc, axis=1), np.std(ddmfcc, axis=1))
                          , axis=0)

# Extract my convnet features
## Models for layer 1-5 (or, 0-4)

In [7]:
tasks = []
tasks.append(['ballroom_extended', 'gtzan_speechmusic'])
tasks.append(['emoMusic', 'jamendo_vd', 'urbansound', 'gtzan_genre', ])

dataroots = []
dataroots.append([None, None])
dataroots.append([None,
                  PATH_PROCESSED,
                  PATH_URBANSOUND,
                  None])

In [4]:
for mid_idx in range(5):
    model = load_model_for_mid(mid_idx)
    npy_suffix = '_layer_{}'.format(mid_idx)
    for task_idx, (filename, dr) in enumerate(zip(tasks, dataroots)):
        print('\nStart: mid_idx: {}, task_idx: {}'.format(mid_idx, task_idx))
        predict(filename, batch_size, model, dr, npy_suffix)
        print('Done: mid_idx: {}, task_idx: {}'.format(mid_idx, task_idx))


  '` call to the Keras 2 API: ' + signature)


TypeError: The `mode` argument of `BatchNormalization` no longer exists. `mode=1` and `mode=2` are no longer supported.

* Due to the Keras 2.0 API change (BatchNormalization) my model can't be loaded. You can use Keras 1.1 to reproduce it, or just use features under `data_feats` folder.

## DO THE JOBS Set 1 - task 1, 2, 3

In [15]:
for filename in ['ballroom_extended', 'gtzan_genre', 'gtzan_speechmusic']:
    get_mfcc(filename)

In [16]:
tasks = ['emoMusic', 'jamendo_vd', 'urbansound']
dataroots = [None, 
             PATH_PROCESSED,
             os.path.join(PATH_URBANSOUND, 'audio')]
for idx, (filename, dr) in enumerate(zip(tasks, dataroots)):
    get_mfcc(filename, dataroot=dr)