In [None]:
from utils import *
from models import *

from sklearn.cross_validation import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# reading training data and extracting features
X_data, Y_data = data_loader('C:\\Users\\Nildip.mukherjee\\AnacondaProjects\\Others\\USC\\UrbanSound\\data')

In [None]:
# padding the sound clipping vectors
X_data, max_lenX = padder(X_data)

# extracting concatenated features
X_train_concat = feat_extract_concat(X_data)
max_lenX = max([len(i) for i in X_train_concat])

# extracting seperated features
X_stft,X_mfccs,X_chroma,X_mel,X_contrast,X_tonnetz = feat_extract_sep(X_data)
input_shape_dict = {}
for i in [(X_stft,'stft'),(X_mfccs,'mfccs'),(X_chroma,'chroma'),(X_mel,'mel'),(X_contrast,'contrast'),(X_tonnetz,'tonnetz')]:
    input_shape_dict['len_{0}'.format(i[1])] = max([len(j) for j in i[0]])
    
# labelencoding train classes
le = preprocessing.LabelEncoder()
le.fit(Y_data)
Y_data = np.array(le.transform(Y_data))

# one hot encoding train classes
Y_train = np_utils.to_categorical(Y_data)

In [None]:
for k in [X_stft,X_mfccs,X_chroma,X_mel,X_contrast,X_tonnetz]:
    plt.figure(figsize= (17, 10))
    for i in set(Y_data):
        idx = ((np.where(np.array(Y_data)==i)))[0]
        x_idx = np.array([k[j] for j in idx])
        x_idx = pd.Series(x_idx).rolling(int(len(list(x_idx))/3)).mean()
        plt.legend()
        plt.plot(x_idx, label = i)  
    plt.show()

In [None]:
# reshaping to required dimensions
X_train_concat = np.expand_dims(np.array(X_train_concat), axis=2)

X_stft = np.expand_dims(np.array(X_stft), axis=2)
X_mfccs = np.expand_dims(np.array(X_mfccs), axis=2)
X_chroma = np.expand_dims(np.array(X_chroma), axis=2)
X_mel = np.expand_dims(np.array(X_mel), axis=2)
X_contrast = np.expand_dims(np.array(X_contrast), axis=2)
X_tonnetz = np.expand_dims(np.array(X_tonnetz), axis=2)

In [None]:
# splitting data into train-test (80-20)
X_train_concat,X_test_concat,X_stft,X_stft_test,X_mfccs,X_mfccs_test,X_chroma,X_chroma_test,X_mel,X_mel_test,X_contrast,X_contrast_test,X_tonnetz,X_tonnetz_test,Y_train,Y_test = train_test_split(X_train_concat,X_stft,X_mfccs,X_chroma,X_mel,X_contrast,X_tonnetz,Y_train, test_size=0.2, random_state=1)

In [None]:
model = concat_1d_CNN(10,max_lenX)
model.summary()
# print model structure to png file
plot_model(model, to_file='model_concat_1Dcnn.png', show_shapes = True)
# complile the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# Fit the model
early_stopping = EarlyStopping(monitor = 'val_loss', patience=2)
model.fit(X_train_concat, Y_train, validation_data = (X_test_concat, Y_test), epochs=200, batch_size=256, 
          shuffle=True, callbacks=[early_stopping])
# Final evaluation of the model
scores = model.evaluate(X_test_concat, Y_test, verbose=0)
print("Model Error: %.2f%%" % (100-scores[1]*100))

In [None]:
model = sep_1d_CNN(10,input_shape_dict)
model.summary()
# print model structure to png file
plot_model(model, to_file='model_sep_1Dcnn.png', show_shapes = True)
# complile the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# Fit the model
early_stopping = EarlyStopping(monitor = 'val_loss', patience=2)
model.fit([X_stft,X_mfccs,X_chroma,X_mel,X_contrast,X_tonnetz],Y_train, 
          validation_data = ([X_stft_test,X_mfccs_test,X_chroma_test,X_mel_test,X_contrast_test,X_tonnetz_test], Y_test), 
          epochs=200, batch_size=256, shuffle=True, callbacks=[early_stopping])
# Final evaluation of the model
scores = model.evaluate([X_stft_test,X_mfccs_test,X_chroma_test,X_mel_test,X_contrast_test,X_tonnetz_test], Y_test, verbose=0)
print("Model Error: %.2f%%" % (100-scores[1]*100))