In [1]:
from scipy.io import wavfile
import numpy as np
import pandas as pd
from oma_spec import gen_mag_spectrogram_mod
from scipy.ndimage.interpolation import zoom
import cPickle as pickle

base = '/media/michael/Seagate/engage/urban8k/'
metadata = pd.read_csv(base + '/UrbanSound8K/metadata/UrbanSound8K.csv')

In [2]:
required_height = 224

for fold in range(1, 11):
    X = []
    fold_meta = metadata[metadata['fold'] == fold]

    for row in fold_meta.iterrows():    
        loadpath = base + 'UrbanSound8K/audio/fold%d/%s' % (
            row[1]['fold'], row[1]['slice_file_name'])

        # reading wav and force mono
        sr, wav = wavfile.read(loadpath)
        if wav.ndim == 2: wav = wav[:, 0]

        # force to be 4 seconds long
        repeats = np.ceil(4.0/(wav.shape[0]/float(sr)))
        required_units = sr * 4.0
        wav = np.tile(wav, repeats)[:required_units]

        # generating spectrogram and doing a low-pass filter
        spec = gen_mag_spectrogram_mod(wav, sr, 0.04)

        # padding or cropping to required shape
        if spec.shape[0] < required_height:
            pad = np.zeros((required_height - spec.shape[0], spec.shape[1]))
            spec = np.vstack((spec, pad))
        elif spec.shape[0] > required_height:
            spec = spec[:required_height]

        # resizing
        factor = (224.0/spec.shape[0], 224.0/spec.shape[1])
        spec = zoom(spec, factor, order=1)
        X.append(spec)

    # saving
    savepath = base + 'specs/fold%d.pkl' % fold
    tosave = {'X': X, 'classID': fold_meta['classID'], 'class': fold_meta['class']}
    pickle.dump(tosave, open(savepath, 'w'), -1)

  return c.reshape(shape_out)


[  4.08480759e-02  -5.83154627e-02   1.82654872e-02  -1.94148262e-01
   1.59455968e-01  -5.44458109e-02  -1.00938341e-01  -1.23143515e-01
  -3.17000883e-02  -1.43235687e-01  -2.48194638e-02   1.01153552e-01
  -3.16286939e-02  -1.80417604e-01   6.76418870e-02   1.37170912e-01
   4.97393339e-02   5.00280013e-02  -2.12403204e-01  -4.56284217e-02
  -2.26952756e-01  -2.78554353e-02  -2.90242499e-02   3.54965059e-02
  -1.46158195e-01  -3.47729866e-02  -3.61761178e-02  -1.08996364e-01
  -1.84600443e-03  -6.92052211e-02   1.15057534e-01  -3.81966693e-02
  -1.55295388e-02  -1.54077618e-01   5.23570148e-02   1.19465478e-01
   9.65533122e-02  -2.52628670e-02  -7.51170516e-03   2.05267146e-01
  -1.45400734e-02  -3.72065385e-03   7.81131346e-02  -7.10841390e-03
   1.27566893e-01  -4.59285718e-02  -1.20344231e-01   1.09866852e-01
  -7.86655874e-02  -1.09372089e-01  -1.75785580e-01  -6.23361217e-02
   2.56738065e-02  -1.30097509e-01   1.00350506e-01   3.50878103e-01
  -8.00187200e-03  -5.02681816e-02