In [1]:
import numpy as np
import pandas as pd
import os
import librosa
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split 

Using TensorFlow backend.


In [2]:
def get_features(file_name):
    
    audio,sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfcc_feat = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    pad_width = max_pad_len - mfcc_feat.shape[1]
    mfcc_feat = np.pad(mfcc_feat, pad_width=((0, 0), (0, pad_width)), mode='constant')
    
    return mfcc_feat

In [3]:
home_path = '/Users/manishatakale/Downloads/mlchallenge/'

os.chdir(home_path)

feat = np.load('feat.npy' , allow_pickle=True)
path = np.load('path.npy' , allow_pickle=True)

print(feat.shape)
print(path.shape)


(105835,)
(105835,)


In [4]:
max_pad_len = 99
features = []

data = pd.read_csv(home_path + 'train.csv')

for index, row in data.iterrows():
    
    file_name = os.path.join(os.path.abspath(home_path + '/wav/')+ '/' + str(row["path"]))
    
    class_label = row["word"]
    data = get_features(file_name)
    
    features.append([data, class_label])


In [5]:
# Convert into a Panda dataframe 
new_features = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(new_features), ' files') 

Finished feature extraction from  94824  files


In [6]:
new_features.head()

Unnamed: 0,feature,class_label
0,"[[-537.57043, -522.28406, -524.77997, -546.343...",one
1,"[[-519.0905, -513.495, -514.77966, -514.9921, ...",one
2,"[[-558.69073, -559.25275, -558.4838, -557.4996...",one
3,"[[-541.83777, -535.3056, -527.8647, -532.6044,...",one
4,"[[-606.06976, -609.54034, -615.7957, -618.5272...",one


In [7]:
X = np.array(new_features.feature.tolist())
y = np.array(new_features.class_label.tolist())

In [8]:
print(X.shape)
print(y.shape)

(94824, 40, 99)
(94824,)


In [9]:
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [11]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(75859, 40, 99)
(18965, 40, 99)
(75859, 35)
(18965, 35)


In [12]:
num_rows = 40
num_columns = 99
num_channels = 1

x_train = x_train.reshape(x_train.shape[0],  num_columns, num_rows,num_channels)
x_test = x_test.reshape(x_test.shape[0],  num_columns,num_rows,num_channels)

num_labels = yy.shape[1]



In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.fit_transform(x_test)

In [13]:
num_labels

35

In [14]:
x_train.shape

(75859, 99, 40, 1)

In [15]:
x_test.shape

(18965, 99, 40, 1)

In [16]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 


In [None]:
##very basic model

M = Sequential()
M.add(Flatten(input_shape = (num_columns, num_rows,num_channels)))
M.add(Dense(num_labels, activation = 'softmax'))
M.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
M.fit(x_train,y_train, epochs=50 , validation_data = (x_test,y_test))

In [None]:
##Adding one  layers

M = Sequential()
M.add(Conv2D(32,(3,3), input_shape = (num_columns, num_rows,num_channels),activation = "relu"))
M.add(MaxPooling2D(pool_size = (2,2)))

M.add(Flatten())
M.add(Dense(128,activation='relu'))
M.add(Dense(num_labels, activation = 'softmax'))

M.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')


In [None]:
M.fit(x_train,y_train, epochs=10 , validation_data = (x_test,y_test))

In [17]:
##Adding two layer Conv2D Model

M = Sequential()
M.add(Conv2D(32,(3,3), input_shape = (num_columns, num_rows,num_channels),activation = "relu"))
M.add(MaxPooling2D(pool_size = (2,2)))

M.add(Conv2D(32,(3,3),activation = "relu"))
M.add(MaxPooling2D(pool_size = (2,2)))

M.add(Flatten())
M.add(Dense(128,activation='relu'))
M.add(Dense(num_labels, activation = 'softmax'))
M.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [18]:
M.fit(x_train,y_train, epochs=10 , validation_data = (x_test,y_test))

Train on 75859 samples, validate on 18965 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1cf1ee8810>