In [None]:
import librosa
import librosa.display
import os
import numpy as np
import pandas as pd
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from keras.callbacks import EarlyStopping

In [None]:
filename = os.listdir(r'C:\Users\micky\Downloads\CleanTrack\CleanTrack')
filename
df = pd.DataFrame()
for file in filename:
    label = file.split('.')[0]
    
    wavfile = r'C:\Users\micky\Downloads\CleanTrack\CleanTrack\{}'.format(file)
    wave, sr = librosa.load(wavfile, mono=True, sr=None)

    # Downsampling
    # wave = wave[::3]
    mfcc = librosa.feature.mfcc(wave, sr=44100, n_mfcc=32, hop_length=int(0.01*44100), n_fft=int(0.025*44100))
    tmp = pd.DataFrame(mfcc).T.head(24000)
    tmp['target'] = label
    if file == 'bus.wav':
        df = tmp
    else:
        df = pd.concat([df, tmp])

In [None]:
#CNN
#設定input長相
# g_size * rows = 72000
# each_cat = rows / 3

# 輸入(rows) 張 (g_size*32) 的音訊片，每類有each_cat張
g_size = 80
rows = 900
each_cat = 300

X = df.iloc[:, :-1].values.reshape(rows, g_size, 32, 1)
y = np.array([0]*each_cat + [1]*each_cat + [2]*each_cat)

# X_train, X_test, y_train, y_test = train_test_split(X, y_hot, test_size=0.30, random_state=1, stratify=y)

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
cvscores = []
early_stop = EarlyStopping(monitor='loss', patience=5, verbose=2)
for train_index, test_index in kfold.split(X, y):

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # 類別變數轉為 one-hot encoding
    y_train_hot = to_categorical(y_train)
    y_test_hot = to_categorical(y_test)

    # 建立簡單的線性執行的模型
    model = Sequential()
    # 建立卷積層，filter = 32,
    # Kernal Size: 4x4, 
    # Activation function: relu
    model.add(Conv2D(64, kernel_size=(4, 4), activation='relu', input_shape=(g_size, 32, 1)))
    # 建立池化層，池化大小=4x4
    model.add(MaxPooling2D(pool_size=(4, 4)))
    # Dropout層隨機斷開輸入神經元，用於防止過度擬合，斷開比例:0.25
    model.add(Dropout(0.25))
    # Flatten層把多維的輸入一維化，常用在從卷積層到全連接層的過渡
    model.add(Flatten())
    # 全連接層: 128個 output
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))
    # Output layer
    model.add(Dense(3, activation='softmax'))
    # 選擇損失函數、優化方法及成效衡量方式
    model.compile(loss=keras.losses.categorical_crossentropy,
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        metrics=['accuracy'])
            
    # Fit the model
    history = model.fit(X_train, y_train_hot, epochs=500, batch_size=32, verbose=2, callbacks=[early_stop])
        
    # loss plot
    # pd.DataFrame(history.history['loss'], columns=['train']).plot()

    # evaluate the model
    scores = model.evaluate(X_test, y_test_hot, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

In [None]:
# DNN

nn = Sequential()
nn.add(Dense(256, activation='relu', input_shape=(80*32,)))
nn.add(Dropout(0.2))
nn.add(Dense(128, activation='relu'))
nn.add(Dropout(0.2))
nn.add(Dense(64, activation='relu'))
nn.add(Dropout(0.2))
nn.add(Dense(3, activation='softmax'))
nn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5)

def get_target(num):
    bus = []
    river = []
    gym = []
    for i in range(1, num+1):
        bus.append('bus')
        river.append('river')
        gym.append('gym')
    for i,j in enumerate([bus, gym, river]):
        if i == 0:
            temp = pd.DataFrame(j, columns=['target'])
        else:
            temp_1 = pd.DataFrame(j, columns=['target'])
            temp = pd.concat([temp, temp_1], axis=0)
    return temp

X = df.iloc[:, :-1].values
X = X.reshape(-1,80*32)
y = get_target(300)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42, stratify=y)
y_train = pd.get_dummies(y_train).values
y_test = pd.get_dummies(y_test).values

nn.fit(X_train, y_train, epochs=100,batch_size=32, validation_split = 0.2, callbacks=[early_stop])
nn.evaluate(X_test, y_test)

In [None]:
# LSTM

sample = df.drop('target',axis=1)
sample=sample.values.reshape((900,80,32))
X_train = np.concatenate((sample[0:210], sample[300:510], sample[600:810]))
X_test = np.concatenate((sample[210:300], sample[510:600], sample[810:900]))
y = pd.get_dummies(df.iloc[:, -1])
y_train = pd.concat([y.iloc[0:210,:], y.iloc[24000:24210,:], y.iloc[48000:48210,:]])
y_test = pd.concat([y.iloc[0:90,:], y.iloc[24000:24090,:], y.iloc[48000:48090,:]])

# 建立簡單的線性執行的模型
model = Sequential()
# 添加第一層LSTM
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 32)))
model.add(Dropout(0.2))
# 添加第二層LSTM
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

# 添加第三層LSTM
model.add(LSTM(units = 50))
model.add(Dropout(0.2))

model.add(Dense(units = 3, activation='softmax'))
model.summary()
# Compiling
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

# 進行訓練
model.fit(X_train, y_train, epochs = 20, batch_size = 32)
model.evaluate(X_test, y_test)