In [22]:
import librosa
import librosa.feature
import librosa.display
import glob
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils.np_utils import to_categorical

In [48]:
def display_mfcc(src):
    y,_ = librosa.load(src)
    mfcc = librosa.feature.mfcc(y)
    
    plt.figure(figsize=(10,4))
    librosa.display.specshow(mfcc, x_axis='time', y_axis='mel')
    plt.colorbar()
    plt.title(src)
    plt.tight_layout()
    plt.show()
    

def extract_features_song(src):
    y, _ = librosa.load(src)
    
    #get mfcc
    mfcc = librosa.feature.mfcc(y)
    #normalize between -1 and 1
    mfcc /= np.amax(np.absolute(mfcc))
    
    return np.ndarray.flatten(mfcc)[:25000]


def generate_features_and_labels():
    all_features = []
    all_labels = []
    
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
    for genre in genres:
        sound_files = glob.glob('genres/'+genre+'/*.wav')
        print(f'Processing {len(sound_files)} songs in {genre} genres ...')
        for f in sound_files:
            features = extract_features_song(f)
            all_features.append(features)
            all_labels.append(genre)
            
    label_uniq_ids, label_row_ids = np.unique(all_labels, return_inverse=True)
    label_row_ids = label_row_ids.astype(np.int32, copy=False)
    onehot_labels = to_categorical(label_row_ids, len(label_uniq_ids))
    return np.stack(all_features), onehot_labels

In [44]:
features, labels = generate_features_and_labels()

Processing 100 songs in blues genres ...
Processing 100 songs in classical genres ...
Processing 100 songs in country genres ...
Processing 100 songs in disco genres ...
Processing 100 songs in hiphop genres ...
Processing 100 songs in jazz genres ...
Processing 100 songs in metal genres ...
Processing 100 songs in pop genres ...
Processing 100 songs in reggae genres ...
Processing 100 songs in rock genres ...


In [45]:
print(np.shape(features))
print(np.shape(labels))

(1000, 25000)
(1000, 10)


In [46]:
print(features, labels)

[[-0.81999075 -0.81014305 -0.75184417 ... -0.01818394  0.01827242
   0.01200242]
 [-0.6975779  -0.49866176 -0.34792846 ... -0.02948674 -0.01857815
  -0.00916902]
 [-0.17461018 -0.20773984 -0.2802357  ...  0.00612161  0.00597738
   0.00513895]
 ...
 [-0.54192156 -0.45409912 -0.4490566  ... -0.00735237 -0.00230275
   0.01251489]
 [-0.35908    -0.39048278 -0.44910252 ... -0.00476223 -0.0132246
  -0.01023068]
 [-0.56411064 -0.59761524 -0.64209867 ...  0.00814686  0.00670453
   0.00592516]] [[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]


In [64]:
training_split = 0.8

alldata = np.column_stack((features, labels))
np.random.shuffle(alldata)
splitidx = int(len(alldata) * training_split)
train, test = alldata[:splitidx,:], alldata[splitidx:, :]

print(np.shape(train))
print(np.shape(test))

#because of one hot encoding
train_input = train[:,:-10]
train_labels = train[:,-10:]

test_input = test[:,:-10]
test_labels = test[:,-10:]


print(np.shape(train_input))
print(np.shape(test_input))

(800, 25010)
(200, 25010)
(800, 25000)
(200, 25000)


In [69]:
model = Sequential([
    Dense(100, input_dim=np.shape(train_input)[1]),
    Activation('relu'),
    Dense(10),
    Activation('softmax'),
])

model.compile(optimizer='adam', 
               loss='categorical_crossentropy', 
               metrics=['accuracy'])

print(model.summary())

model.fit(train_input, train_labels, epochs=10, batch_size=32, 
          validation_split=0.2)

loss,acc = model.evaluate(test_input, test_labels, batch_size=32)

print('Done!')
print('Loss: %.4f, accuracy: %.4f' %(loss,acc))

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 100)               2500100   
_________________________________________________________________
activation_18 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 10)                1010      
_________________________________________________________________
activation_19 (Activation)   (None, 10)                0         
Total params: 2,501,110
Trainable params: 2,501,110
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Done!
Loss: 1.6341, accuracy: 0.4800


In [70]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk
