In [1]:
# Counting sounds in audio

In [94]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import csv
import os
import sys
import collections
import numpy as np

sys.path.append(os.path.expanduser('~/projects/engaged_hackathon/'))
from engaged.features import frequency
from engaged.features import wave
from engaged.features import features

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [80]:
base_path = '/home/michael/projects/engaged_hackathon_data/raw_data/one_minute_files/'
dataset_csv = csv.reader(open(base_path + 'urban_sounds_labels.csv'))

annotations = []  # list of all class info

def try_number(s):
    """Converts s to float if possible, else leaves as is"""
    try:
        return float(s)
    except ValueError:
        return s

# I'm basically reinventing pandas here - very silly
for count, line in enumerate(dataset_csv):
    if count == 0:
        header = line
        continue
        
    annotation = {label:try_number(item) for label, item in zip(header, line)}    
    annotation['length'] = \
        annotation['LabelEndTime_Seconds'] - annotation['LabelStartTime_Seconds']
    annotation['Label'] = annotation['Label'].strip().lower()
    annotations.append(annotation)

In [64]:
spectrogram_parameters = {
    'nfft': 1024,
    'window_width': 0.03,
    'overlap': 0.01
    }

In [58]:
# group annotations by filename
file_annotations = collections.defaultdict(list)
for annotation in annotations:
    file_annotations[annotation['Filename']].append(annotation)

In [150]:
# The audio files are too big to load in all at once, so we will loop over them and process them one at a time
# loop through the csv file - (could be better to only do this once at the top of the notebook)
import cPickle as pickle

where_to_save = '/home/michael/projects/engaged_hackathon_data/detection/training_data/'

bird_snippets = {}
non_bird_snippets = {}

# from skimage.util import view_as_blocks
# slice_height = 16
# def vertical_slice(array, slice_height):
#     """
#     Slice up an array in the vertical direction
#     """
#     new_height = slice_height * np.floor(array.shape[0] / float(slice_height))
#     array = features.force_spectrogram_length(array.T, new_height).T

#     view = view_as_blocks(array, (slice_height, array.shape[1]))
# #     print "Shape is ", view.shape
#     if view.shape[0] > 0:
#         return view.reshape(view.shape[0], -1)
#     else:
#         return np.array([])
    

def list_of_snippets_to_np(snippet_list):
    """
    Reshapes a list of snippets to a single numpy array
    """
    slices = [snip.series for snip in snippet_list]
    
    if slices:
        return np.vstack(slices)
    else:
        return np.array([])
        
count = 0
for filename, annotations in file_annotations.iteritems():
    
    # load in the audio signal and the annotations
    wav = wave.LabelledWave()
    wav.load_from_wav(base_path + '25_Random/' + filename)
    wav.set_annotations(annotations)
    
    # convert to a spectrogram and remove the median
    spec = wave.LabelledSpectrogram()
    spec.from_wave(wav, **spectrogram_parameters)    
    spec.series -= np.median(spec.series, axis=0)
    
    # extract bird and non-bird from the spectrogram
    bird_snippets[filename] = spec.snippets_with_labels('bird')
    non_bird_snippets[filename] = spec.snippets_except_labels('bird')
    
    # combine each list
    bird_slices_np = list_of_snippets_to_np(bird_snippets[filename])
    non_bird_slices_np = list_of_snippets_to_np(non_bird_snippets[filename])
    
    # possibly save these to disk if memory becomes tight...
    fname = where_to_save + filename.replace('.wav', '_bird_snippets.pkl')
    pickle.dump(bird_slices_np, open(fname, 'w'))
    
    fname = where_to_save + filename.replace('.wav', '_non_bird_snippets.pkl')
    pickle.dump(non_bird_slices_np, open(fname, 'w'), protocol=pickle.HIGHEST_PROTOCOL)
    
    # keep us updated about how well we are doing
    print "Done %d of %d" % (count, len(file_annotations))
    count += 1

Done 0 of 362
Done 1 of 362
Done 2 of 362
Done 3 of 362
Done 4 of 362
Done 5 of 362
Done 6 of 362
Done 7 of 362
Done 8 of 362
Done 9 of 362
Done 10 of 362
Done 11 of 362
Done 12 of 362
Done 13 of 362
Done 14 of 362
Done 15 of 362
Done 16 of 362
Done 17 of 362
Done 18 of 362
Done 19 of 362
Done 20 of 362
Done 21 of 362
Done 22 of 362
Done 23 of 362
Done 24 of 362
Done 25 of 362
Done 26 of 362
Done 27 of 362
Done 28 of 362
Done 29 of 362
Done 30 of 362
Done 31 of 362
Done 32 of 362
Done 33 of 362
Done 34 of 362
Done 35 of 362
Done 36 of 362
Done 37 of 362
Done 38 of 362
Done 39 of 362
Done 40 of 362
Done 41 of 362
Done 42 of 362
Done 43 of 362
Done 44 of 362
Done 45 of 362
Done 46 of 362
Done 47 of 362
Done 48 of 362
Done 49 of 362
Done 50 of 362
Done 51 of 362
Done 52 of 362
Done 53 of 362
Done 54 of 362
Done 55 of 362
Done 56 of 362
Done 57 of 362
Done 58 of 362
Done 59 of 362
Done 60 of 362
Done 61 of 362
Done 62 of 362
Done 63 of 362
Done 64 of 362
Done 65 of 362
Done 66 of 362
Done 

In [None]:
# now load in training data for bird and non-bird
all_birds, all_non_birds = [], []

for filename in file_annotations:

    # load bird training instance
    fname = where_to_save + filename.replace('.wav', '_bird_snippets.pkl')
    all_birds_np = pickle.load(open(fname))
    if all_birds_np.shape[0] > 0:
        all_birds.append(all_birds_np)
    
    # load non-bird training instance
    fname = where_to_save + filename.replace('.wav', '_non_bird_snippets.pkl')
    all_non_birds.append(pickle.load(open(fname)))
    
    
print np.vstack(all_birds).shape
print np.vstack(all_non_birds).shape

In [152]:
# now train a model 
from sklearn.
# print [b.shape for b in all_birds]

[(0,), (0,), (1031, 512), (183, 512), (98, 512), (0,), (0,), (1054, 512), (0,), (1216, 512), (398, 512), (0,), (733, 512), (72, 512), (367, 512), (0,), (0,), (0,), (159, 512), (368, 512), (1142, 512), (0,), (0,), (0,), (132, 512), (0,), (19, 512), (0,), (567, 512), (1649, 512), (1627, 512), (0,), (0,), (189, 512), (437, 512), (0,), (1273, 512), (152, 512), (0,), (0,), (0,), (193, 512), (0,), (0,), (0,), (0,), (0,), (1460, 512), (0,), (0,), (776, 512), (0,), (0,), (569, 512), (0,), (0,), (1531, 512), (0,), (0,), (969, 512), (0,), (1342, 512), (1292, 512), (0,), (563, 512), (0,), (0,), (0,), (0,), (0,), (923, 512), (0,), (0,), (0,), (0,), (242, 512), (0,), (0,), (37, 512), (0,), (290, 512), (542, 512), (0,), (0,), (37, 512), (85, 512), (3647, 512), (0,), (433, 512), (0,), (68, 512), (0,), (0,), (0,), (0,), (289, 512), (0,), (0,), (0,), (0,), (0,), (0,), (0,), (0,), (67, 512), (0,), (377, 512), (87, 512), (0,), (612, 512), (0,), (0,), (121, 512), (0,), (0,), (0,), (2342, 512), (5, 512), (

In [40]:
# now do train/test split at file level...
from sklearn.cross_validation import train_test_split
train_files, test_files = train_test_split(file_annotations.keys())

In [53]:
# maybe split the spectrograms into slices and use each slice as a training example

# now extract all the bird/non-bird from the training set
train_birds = [x for train_f in train_files for x in bird_snippets[train_f] ]
train_non_birds = [x for train_f in train_files for x in non_bird_snippets[train_f] ]

test_birds = [x for test_f in test_files for x in bird_snippets[test_f] ]
test_non_birds = [x for test_f in test_files for x in non_bird_snippets[test_f]]

# todo - flip this table so train/test is columns
print "SNIPPETS"
print (' ' * 15) + 'Birds, non-birds'
print "Training: ".ljust(15), len(train_birds), len(train_non_birds)
print "Testing: ".ljust(15), len(test_birds), len(test_non_birds)

SNIPPETS
               Birds, non-birds
Training:       3816 2462
Testing:        982 748


In [54]:
# now convert each snippet to an array of windows
# (make this a function)
slices = []
for snippet in train_birds:
    if snippet.wav.shape[0] < 1000:
        continue
        
    i
        
    

17041
16800
23759
19920
13440
12480
49200
24720
27840
30239
46800
50640
14400
21600
25920
22800
29040
13680
14880
14880
15360
15600
27840
18480
20640
37440
27359
35040
32641
19200
12000
9360
28559
17041
29280
21120
17040
32880
27600
25680
24960
5520
30240
49200
13440
10560
15120
14160
4320
10800
11760
15120
8399
11760
15360
6480
7920
15600
15360
20879
24240
25920
24960
16800
24720
15120
9360
30240
17760
9360
7680
6240
5280
8160
55920
6720
24960
6480
33360
65520
18720
16079
13440
4800
9600
5520
44640
47040
5040
6239
17280
15840
10800
5760
19440
17760
13440
9600
7439
6480
13200
10800
11039
12720
8641
11040
14160
13200
15840
12240
11040
10800
11760
12960
11520
10079
13680
14640
10320
7200
4800
10080
6480
16800
7680
10320
9360
5281
6960
5520
8161
9120
6960
7440
8640
6480
6240
9360
7920
7440
7440
6480
7201
6239
6480
3360
6960
4080
20640
7440
8400
7681
8640
11760
9120
5520
6241
4320
10801
8400
7200
9120
7200
8400
8880
11760
17280
14400
15360
19440
15360
6720
6960
6480
6000
3120
3840
3120
264