In [251]:
# Load various imports 
import pandas as pd
import os
import librosa
import librosa.display
from sklearn import preprocessing
import numpy as np
import IPython


In [252]:
path_train = 'data/train_labels.csv'
path_test = 'data/test_files.csv'
num_cc = 50
df = pd.read_csv(path_train)
df['filename'] = df['filename'].map(lambda x: 'data/data/%04d.wav' % x)
le = preprocessing.LabelEncoder()
df['height'] = le.fit_transform(df['height'])
le = preprocessing.LabelEncoder()
df['pasture'] = le.fit_transform(df['pasture'])
print(len(df.index))
df.head(2)

1532


Unnamed: 0,filename,pasture,height,label
0,data/data/0004.wav,0,1,2
1,data/data/0009.wav,0,1,2


In [253]:
IPython.display.Audio(df.loc[4]['filename'])

In [255]:
max_pad_len = 174
def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=num_cc)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccs

In [256]:

features = []
for k,row in df.iterrows():
    class_label = row["label"]
    data = extract_features(row['filename'])
    features.append([data, class_label])
df_features = pd.DataFrame(features, columns=['feature','class_label'])
print('Finished feature extraction from ', len(df_features), ' files')
df_features['pasture'] = row['pasture']
df_features['height'] = row['height']
df_features.head(2)

Finished feature extraction from  1532  files


Unnamed: 0,feature,class_label,pasture,height
0,"[[-431.2328, -403.76685, -400.59433, -425.5229...",2,1,1
1,"[[-475.67163, -462.17108, -454.94983, -450.248...",2,1,1


In [257]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(df_features.feature.tolist())
y = np.array(df_features.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)


In [258]:
np.array(df_features.pasture.tolist())

array([1, 1, 1, ..., 1, 1, 1])

In [259]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = num_cc
num_columns = 174
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')


In [260]:
# Display model architecture summary 
model.summary()
# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]
print("Pre-training accuracy: %.4f%%" % accuracy) 


Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 49, 173, 16)       80        
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 24, 86, 16)        0         
_________________________________________________________________
dropout_19 (Dropout)         (None, 24, 86, 16)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 23, 85, 32)        2080      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 11, 42, 32)        0         
_________________________________________________________________
dropout_20 (Dropout)         (None, 11, 42, 32)        0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 10, 41, 64)       

In [None]:
#https://medium.com/@mikesmales/sound-classification-using-deep-learning-8bc2aa1990b7
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 72
num_batch_size = 256

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)


Train on 1225 samples, validate on 307 samples
Epoch 1/72
Epoch 2/72
Epoch 3/72
Epoch 4/72
Epoch 5/72
Epoch 6/72
Epoch 7/72
Epoch 8/72
Epoch 9/72
Epoch 10/72
Epoch 11/72
Epoch 12/72
Epoch 13/72
Epoch 14/72
Epoch 15/72
Epoch 16/72
Epoch 17/72
Epoch 18/72
Epoch 19/72
Epoch 20/72
Epoch 21/72
Epoch 22/72
Epoch 23/72
Epoch 24/72
Epoch 25/72
Epoch 26/72
Epoch 27/72
Epoch 28/72
Epoch 29/72
Epoch 30/72
Epoch 31/72
Epoch 32/72
Epoch 33/72
Epoch 34/72
Epoch 35/72
Epoch 36/72
Epoch 37/72
Epoch 38/72
Epoch 39/72
Epoch 40/72
Epoch 41/72
Epoch 42/72
Epoch 43/72
Epoch 44/72
Epoch 45/72
Epoch 46/72
Epoch 47/72
Epoch 48/72
Epoch 49/72
Epoch 50/72


In [None]:
df_test_ = pd.read_csv(path_test)
df_test = df_test_.copy()
df_test['filename'] = df_test['filename'].map(lambda x: 'data/data/%04d.wav' % x)
print(len(df_test.index))
df_test.head(2)

In [141]:
features = []
for k,row in df_test.iterrows():
    data = extract_features(row['filename'])
    features.append([data])
df_features_test = pd.DataFrame(features, columns=['feature'])
print('Finished feature extraction from ', len(df_features_test), ' files')


Finished feature extraction from  1551  files


In [142]:
X_test = np.array(df_features_test.feature.tolist())
X_test = X_test.reshape(X_test.shape[0], num_rows, num_columns, num_channels)

In [143]:
y_prob = model.predict(X_test)
y_classes = y_prob.argmax(axis=-1)


In [144]:
len(X_test)

1551

In [145]:
y_classes

array([1, 2, 1, ..., 1, 1, 0])

In [146]:
df_test_['label'] = y_classes

In [147]:
df_test_.to_csv('data/res.csv', index=None, header=None)