In [12]:
!python --version

Python 3.7.7


## 加载数据到内存

In [13]:
import librosa
import numpy as np
import os
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
data_set = []
label_set = []
label2id = {genre:i for i,genre in enumerate(genres)}
id2label = {i:genre for i,genre in enumerate(genres)}
print(label2id)
for g in genres:
    print(g)
    for filename in os.listdir(f'../dataset/genres/{g}/'):
        songname = f'../dataset/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        data_set.append([float(i) for i in to_append.split(" ")])
        label_set.append(label2id[g])

{'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}
blues
classical
country
disco
hiphop
jazz
metal
pop
reggae
rock


## 创建数据集

In [14]:
from sklearn.preprocessing import StandardScaler
from keras.utils import np_utils
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data_set, dtype = float))
y = np_utils.to_categorical(np.array(label_set))

Using TensorFlow backend.


In [15]:
print("X.shape: ", X.shape, " Y.shape:", y.shape)

X.shape:  (1000, 26)  Y.shape: (1000, 10)


### 将测试集和训练集分隔

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## 创建模型

In [17]:
from keras import models
from keras.layers import Dense, Dropout
def create_model():
    model = models.Sequential()
    model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    return model
model = create_model()

这里创建了一个包含三个隐藏层的神经网络，最后一层输出的是分类层，因为是10类，所以最后一层是10个单元。（这里增加了一层Dropout减少数据过拟合）

## 编译模型

这里是一个分类问题，所以使用类别交叉熵函数`categorical_crossentropy`，然后优化器选择`Adam`，评价指标选择正确率。

In [18]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## 训练与评估

接下来使用`fit`方法进行训练，训练50轮。

In [19]:
model.fit(X_train, y_train, epochs=50, batch_size=128)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x7fdfb01a9350>

使用`evaluate`方法进行评估。

In [20]:
test_loss, test_acc = model.evaluate(X_test,y_test)
print('test_acc: ',test_acc)

test_acc:  0.6349999904632568
