In [20]:
import librosa
import librosa.display 
import pandas as pd
import numpy as np
from glob import glob
import cv2
import os
import matplotlib.pyplot as pltimport 
%matplotlib inline
from timeit import default_timer as timer


**Creating the Models**

In [21]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
def create_cnn(width, height, depth, filters=(16, 32, 64)):
    # First CONV layer
    inputShape = (height, width, depth)
    inputs = Input(shape=inputShape)
    x = inputs
    x = Conv2D(32, (3, 3), padding="same")(x)
    x = Activation("relu")(x)   
    # 2nd CONV
    x = Conv2D(64, (3, 3))(x)
    x = Activation("relu")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)  
    # 3rd CONV
    x = Conv2D(64, (3, 3), padding="same")(x)
    x = Activation("relu")(x)
    # 4th CONV
    x = Conv2D(64, (3, 3))(x)
    x = Activation("relu")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.5)(x)
    # 5th CONV
    x = Conv2D(128, (3, 3), padding="same")(x)
    x = Activation("relu")(x) 
    # 6th CONV
    x = Conv2D(128, (3, 3))(x)
    x = Activation("relu")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.7)(x)                             #0.5
    # Flatten and Output
    x = Flatten()(x)
    x = Dense(512)(x)
    x = Activation("relu")(x)
    x = Dropout(0.7)(x)                             #0.5
    model = Model(inputs, x)
    return model

**Audio Features: Train**

In [22]:
traindf = pd.read_csv('train.csv',dtype=str)
trainAudioPath = ""

**Auido Features: Test**

In [23]:
testdf = pd.read_csv('test.csv', dtype=str)
testAudioPath = ""

**Load Audio Features**

In [24]:
trainAudioFeatures = np.load('trainAudioFeature277.npy', allow_pickle=True)
testAudioFeatures = np.load('testAudioFeature277.npy', allow_pickle=True)

In [25]:
trainAudioFeatures = np.array(trainAudioFeatures)
testAudioFeatures = np.array(testAudioFeatures)

In [26]:
import sklearn as sk 
scaler1 = sk.preprocessing.StandardScaler().fit(trainAudioFeatures)
trainAudioFeatures = scaler1.transform(trainAudioFeatures)
testAudioFeatures = scaler1.transform(testAudioFeatures)

**Labels**

In [27]:
#from keras.utils.np_utils import to_categorical
# LABELS
train_labels = traindf["Class"]
test_labels = testdf["Class"]
#labels = to_categorical(labels, 10)

In [28]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

le.fit(train_labels)
train_labels = le.transform(train_labels)
test_labels = le.transform(test_labels)

In [29]:
train_labels

array([5, 3, 9, 3, 5, 6, 0, 1, 4, 7, 2, 0, 7, 6, 6, 0, 9, 3, 7, 1, 5, 8,
       7, 6, 5, 8, 2, 1, 4, 6, 4, 5, 8, 2, 9, 1, 3, 4, 4, 2, 2, 8, 2, 8,
       3, 7, 7, 1, 7, 7, 2, 8, 5, 6, 6, 2, 7, 0, 8, 1, 5, 6, 5, 5, 5, 0,
       6, 7, 3, 6, 2, 0, 9, 3, 8, 2, 0, 8, 6, 7, 5, 2, 5, 4, 0, 0, 1, 1,
       9, 5, 9, 0, 4, 4, 3, 8, 7, 3, 4, 6, 5, 2, 6, 1, 9, 6, 3, 6, 4, 7,
       5, 1, 1, 9, 2, 9, 9, 9, 2, 1, 8, 1, 2, 4, 2, 3, 3, 1, 6, 5, 9, 0,
       7, 1, 0, 2, 4, 9, 7, 7, 9, 1, 8, 1, 4, 9, 0, 1, 2, 7, 3, 8, 2, 2,
       3, 1, 0, 4, 1, 3, 7, 3, 1, 9, 8, 4, 8, 0, 3, 0, 3, 4, 4, 5, 5, 3,
       4, 6, 2, 0, 5, 5, 6, 1, 8, 3, 3, 7, 8, 9, 9, 6, 7, 0, 8, 7, 1, 6,
       0, 1, 6, 8, 0, 0, 8, 2, 6, 4, 4, 8, 9, 1, 2, 8, 3, 5, 3, 4, 7, 9,
       0, 7, 4, 3, 4, 4, 8, 0, 5, 7, 8, 2, 4, 5, 7, 1, 7, 1, 9, 2, 2, 9,
       2, 9, 5, 6, 8, 6, 3, 5, 3, 2, 5, 3, 9, 6, 7, 9, 9, 5, 0, 9, 1, 0,
       6, 6, 7, 6, 0, 5, 0, 3, 8, 4, 4, 8, 4, 8, 0, 9])

## 2. Training CNN (1-D Feature)

In [30]:
from sklearn.model_selection import train_test_split
from keras.layers.core import Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import concatenate
import numpy as np
import argparse
import locale
import os
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPool1D
from keras import regularizers, optimizers
from keras.utils.np_utils import to_categorical


print("[INFO] loading data...")
#features, images, labels = features, images, labels

print("[INFO] processing data...")
#split = train_test_split(features, images, labels, test_size=0.25, random_state=42)
#(trainFeatureX, testFeatureX, trainImagesX, testImagesX, trainY, testY) = split


trainFeatureX, testFeatureX, trainY, testY = trainAudioFeatures, testAudioFeatures, train_labels, test_labels

# For prediction and Confusion Matrix
trueY = testY

trainY = to_categorical(trainY, 16)
testY = to_categorical(testY, 16)


# define model
n_steps = 1
n_features = 277

# model = Sequential()
# model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(n_steps, n_features)))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Flatten())
# model.add(Dense(50, activation='relu'))
# model.add(Dense(16,activation="softmax"))
# model.compile(optimizer='adam', loss='mse')


model = Sequential()
model.add(Conv1D(nb_filter=512, filter_length=1, input_shape=(n_features, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dropout(0.4))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(16))
model.add(Activation('softmax'))

model.summary()

# # create the MLP and CNN models
# #mlp = create_mlp()
# cnn = create_cnn(120, 1, 1)

# # create the input to our final set of layers as the *output* of both the MLP and CNN
# #combinedInput = concatenate([mlp.output, cnn.output])

# Input = cnn.output

# # The final FC layer head will have two dense layers, the final one being softmax layer
# x = Dense(512, activation="relu")(Input)
# x = Dense(16, activation="softmax")(x)

# # The final model will accept audio on the MLP input and SPEC on the CNN input, outputting prediction
# model_cnn = Model(inputs=cnn.input, outputs=x)

[INFO] loading data...
[INFO] processing data...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_2 (Conv1D)            (None, 277, 512)          1024      
_________________________________________________________________
activation_3 (Activation)    (None, 277, 512)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 141824)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 141824)            0         
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              145228800 
_________________________________________________________________
dense_5 (Dense)              (None, 512)               524800    
_________________________________________________________________
dense_6 (Dense)            



In [32]:
# Compile the model
model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"]) #lr 0.0005

# Train the model
print("[INFO] training model...")
history = model.fit(trainFeatureX.reshape(480, 277,1), trainY, validation_data=(testFeatureX.reshape(120, 277,1), testY), epochs=250, batch_size=32)  # 32

[INFO] training model...


ValueError: cannot reshape array of size 77560 into shape (480,277,1)

In [None]:
# Make predictions
print("[INFO] Evaluating...")
model.evaluate(testFeatureX.reshape(120,277,1), testY)