# UrbanSound8K pre-processing

It creates three files, `train`, `valid`, and `test` + `.h5`

Split: folder 1-8: train, 9:valid, 10:test

By Keunwoo Choi. 29 Nov 2016.

### Load things

In [52]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import h5py
import librosa
import os, sys
import pandas as pd 

### Setup path

**Change these to your setup**

* **`PATH_US`**: audio folder path
* **`path_csv`**: csv file path
* **`PATH_HDF`**: output HDF folder path 


In [12]:
PATH_US = '/misc/kcgscratch1/ChoGroup/keunwoo/UrbanSound8K/audio/'
path_csv = '/misc/kcgscratch1/ChoGroup/keunwoo/UrbanSound8K/metadata/UrbanSound8K.csv'
PATH_HDF = '/misc/kcgscratch1/ChoGroup/keunwoo/urbansound8k_hdf/'
# slice_file_name	fsID	start	end	salience	fold	classID	class
# 100032-3-0-0.wav	100032	0	0.317551	1	5	3	dog_bark
# 100263-2-0-117.wav	100263	58.5	62.5	1	5	2	children_playing
# 100263-2-0-121.wav	100263	60.5	64.5	1	5	2	children_playing

In [45]:
fold_folders = ['fold%d/' % i for i in range(1, 11)]
n_label = 10 # 0 - 9

### Audio stuff
* **Modify these if you want. **

In [53]:
# audio
SR = 12000 # [Hz]
max_len = 4.0 # [Seconds]. should be < 4.0. I recommend not to change it. 
n_mels = 96
n_fft = 512
n_hop = 256

def process_audio(src, sr):
    melgram = librosa.feature.melspectrogram(src, sr, n_fft=n_fft, hop_length=n_hop, n_mels=n_mels)
    melgram = np.abs(melgram)
    return librosa.logamplitude(melgram**2)

In [47]:
mel_shape = librosa.feature.melspectrogram(np.zeros(SR*max_len), SR, n_fft=n_fft, hop_length=n_hop, n_mels=n_mels).shape
print mel_shape
n_mel_fr = mel_shape[1]

(96, 188)


  if __name__ == '__main__':


## Load csv

In [51]:
df = pd.read_csv(path_csv, header=0)
print df.shape
n_data_all = df.shape[0]
n_valid = len(df[df['fold']==9])
n_test = len(df[df['fold']==10])
n_train = n_data_all - n_valid - n_test
print n_train, n_valid, n_test

(8732, 8)
7079 816 837


### A function to load audio, compute melgram, and store into HDF

In [41]:
def set_to_hdf(hdf_filepath, df_subset):
    num_data = len(df_subset)
    with h5py.File(hdf_filepath, 'w') as f_hdf:
        ds_melgram = f_hdf.create_dataset('melgram', (num_data, n_mels, n_mel_fr), dtype='float32')
        ds_y = f_hdf.create_dataset('y', (num_data, n_label), dtype='bool')
        for row_idx, row in enumerate(df_subset.itertuples()):
            fname, fold, y = row[1], row[6], row[7]
            folder = fold_folders[fold-1]
            src, sr = librosa.load(PATH_US + folder + fname, SR)

            melgram = process_audio(src, SR)
            shape_here = melgram.shape
            ds_y[row_idx, y] = True
            ds_melgram[row_idx, :, :min(n_mel_fr, shape_here[1])] = melgram[:, :n_mel_fr]
            if row_idx % 20 == 0:
                sys.stdout.write('\r%d/%d-th sample was written just now.' % (row_idx+1, num_data))
    print '\n'


### Do it!

In [43]:
set_to_hdf(PATH_HDF+'valid.h5', df[df['fold']==9], )
set_to_hdf(PATH_HDF+'test.h5', df[df['fold']==10], )
set_to_hdf(PATH_HDF+'train.h5', df[df['fold'] < 9], )

7061/7079-th sample was written just now.

## Done.