In [1]:
%load_ext lab_black

In [2]:
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import librosa
import random
import os
from random import random, uniform
from tqdm import tqdm


%matplotlib inline
allFilesList: list = []

dirName = ["train", "valid", "pitchShiftTest"]
trainFolders: list = sorted(os.listdir(f"./{dirName[0]}/"))
trainList: list = []

validFolders: list = sorted(os.listdir(f"./{dirName[1]}/"))
validList: list = []

pitchShiftFolders: list = sorted(os.listdir(f"./{dirName[2]}/"))
pitchShiftList: list = []
# categorys: list = [f"{i}".rjust(2, "0") for i in range(1, 11)]
categorysCounts: dict = {"train": {}, "valid": {}, "pitch_shift": {}}

for folder in tqdm(trainFolders):
    files = os.listdir("./train/" + folder)
    for num, file in enumerate(files):
        allFilesList.append("./train/" + folder + "/" + file)
        trainList.append(folder + "/" + file)
        categorysCounts["train"][folder] = num + 1


for folder in tqdm(validFolders):
    files = os.listdir("./valid/" + folder)
    for num, file in enumerate(files):
        allFilesList.append("./valid/" + folder + "/" + file)
        validList.append(folder + "/" + file)
        categorysCounts["valid"][folder] = num + 1

for folder in tqdm(pitchShiftFolders):
    files = os.listdir("./pitchShiftTest/" + folder)
    for num, file in enumerate(files):
        allFilesList.append("./pitchShiftTest/" + folder + "/" + file)
        pitchShiftList.append(folder + "/" + file)
        categorysCounts["pitch_shift"][folder] = num + 1

trainList = sorted(trainList)
validList = sorted(validList)
pitchShiftList = sorted(pitchShiftList)
allFilesList = sorted(allFilesList)


def extractFeatures(
    path: str, ps: bool = False, ts: bool = False, st: int = 4
) -> np.ndarray:
    """[提取特徵]
    
    Arguments:
        path {str} -- [路徑]
        ps {bool} 
    Returns:
        np.ndarray -- 
               [
                mfccs,
                mfcc_delta,
                mfcc_delta2,
                chroma,
                mel,
                contrast,
                tonnetz,
                cent,
                flatness,
                rolloff,
                rms,
                ]
    """
    try:
        X, sampleRate = librosa.load(
            path, offset=0.0, res_type="kaiser_best", dtype=np.float32,
        )
        if ps:
            X = librosa.effects.pitch_shift(X, sampleRate, n_steps=st)

        mel = np.mean(librosa.feature.melspectrogram(X, sr=sampleRate).T, axis=0)
        tonnetz = np.mean(
            librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sampleRate).T,
            axis=0,
        )
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sampleRate, n_mfcc=40).T, axis=0)
        mfcc_delta = librosa.feature.delta(mfccs)  # TONY
        mfcc_delta2 = librosa.feature.delta(mfccs, order=2)  # TONY
        stft = np.abs(librosa.stft(X))
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sampleRate).T, axis=0)
        contrast = np.mean(
            librosa.feature.spectral_contrast(S=stft, sr=sampleRate).T, axis=0
        )
        ###### ADD NEW FEATURES (SPECTRAL RELATED)##### 24-SEP
        cent = np.mean(librosa.feature.spectral_centroid(y=X, sr=sampleRate).T, axis=0)
        flatness = np.mean(librosa.feature.spectral_flatness(y=X).T, axis=0)
        rolloff = np.mean(
            librosa.feature.spectral_rolloff(S=stft, sr=sampleRate).T, axis=0
        )
        rms = np.mean(librosa.feature.rms(S=stft).T, axis=0)
        ext_features = np.hstack(
            [
                mfccs,
                mfcc_delta,
                mfcc_delta2,
                chroma,
                mel,
                contrast,
                tonnetz,
                cent,
                flatness,
                rolloff,
                rms,
            ]
        )

    except Exception as e:
        print("Error encountered while parsing file:%s" % (path))
        return None

    return np.array(ext_features)


def creatSets(
    path: str, dataList: list, shape: int, ps: bool = False, st: float = 4
) -> (np.ndarray, np.ndarray):
    """[創建訓練資料]
    
    Arguments:
        path {str} -- [路徑]
        dataList {list} -- [檔案列表]
        shape {tuple} -- [矩陣維度]
    Returns:
        [(np.ndarray, np.ndarray)] -- [(特徵,種類)]
    """
    xArray = np.zeros([len(dataList), shape])
    yArray = np.zeros([len(dataList)])

    for index, file in tqdm(enumerate(dataList)):
        file = path + file
        try:
            xArray[index] = extractFeatures(file, ps=ps, st=st)
            yArray[index] = file.rsplit("/", 2)[1]
        except ValueError:
            print(index, file, ValueError)
    return (xArray, yArray)


def creatAugmentSets(
    path: str, dataList: list, shape: int, percent: float = 0
) -> (np.ndarray, np.ndarray):
    """[創建訓練資料]
    
    Arguments:
        path {str} -- [路徑]
        dataList {list} -- [檔案列表]
        shape {tuple} -- [矩陣維度]
    Returns:
        [(np.ndarray, np.ndarray)] -- [(特徵,種類)]
    """
    fileCounts = len(dataList)
    xArray = np.zeros([fileCounts * 5, shape])
    yArray = np.zeros([fileCounts * 5], dtype=np.int8)

    for index, file in tqdm(enumerate(dataList)):
        file = path + file
        try:
            st = uniform(1.0, 2.0)
            st2 = uniform(2.0, 3.0)
            st3 = uniform(3.0, 5.0)
            st4 = uniform(5.0, 7.0)
            ps = random() > percent

            xArray[index] = extractFeatures(file)
            yArray[index] = np.int8(file.rsplit("/", 2)[1])
            xArray[fileCounts + index] = extractFeatures(file, ps=ps, st=st)
            yArray[fileCounts + index] = np.int8(file.rsplit("/", 2)[1])
            xArray[fileCounts * 2 + index] = extractFeatures(file, ps=ps, st=st2)
            yArray[fileCounts * 2 + index] = np.int8(file.rsplit("/", 2)[1])
            xArray[fileCounts * 3 + index] = extractFeatures(file, ps=ps, st=st3)
            yArray[fileCounts * 3 + index] = np.int8(file.rsplit("/", 2)[1])
            xArray[fileCounts * 4 + index] = extractFeatures(file, ps=ps, st=st4)
            yArray[fileCounts * 4 + index] = np.int8(file.rsplit("/", 2)[1])
        except ValueError:
            print(index, file, ValueError)
    return (xArray, yArray)


def zScore(x):
    return (x - np.mean(x, axis=0)) / np.std(x, axis=0)

100%|██████████| 10/10 [00:00<00:00, 10187.77it/s]
100%|██████████| 10/10 [00:00<00:00, 10721.64it/s]
100%|██████████| 10/10 [00:00<00:00, 10138.52it/s]


In [None]:
trainData, trainLabel = creatAugmentSets(
    "./train/", trainList, extractFeatures("./train/" + trainList[0]).shape[0], 0
)


validData, validLabel = creatSets(
    "./valid/", validList, extractFeatures("./valid/" + validList[0]).shape[0], False
)


pitchShiftTestData, pitchShiftTestLabel = creatSets(
    "./pitchShiftTest/",
    pitchShiftList,
    extractFeatures("./pitchShiftTest/" + pitchShiftList[0]).shape[0],
    True,
    3,
)

200it [04:57,  1.49s/it]
74it [00:17,  4.39it/s]

# 儲存資料
- 訓練資料
- 驗證資料

In [7]:
indices = np.random.permutation(trainData.shape[0])
trainData = trainData[indices]
trainLabel = trainLabel[indices]

indices = np.random.permutation(validData.shape[0])
validData = validData[indices]
validLabel = validLabel[indices]

indices = np.random.permutation(pitchShiftTestData.shape[0])
pitchShiftTestData = pitchShiftTestData[indices]
pitchShiftTestLabel = pitchShiftTestLabel[indices]

np.save("trainData.npy", zScore(trainData))
np.save("trainLabel.npy", trainLabel.astype(np.int))

np.save("validData.npy", zScore(validData))
np.save("validLabel.npy", validLabel.astype(np.int))

np.save("pitchShiftTestData.npy", zScore(pitchShiftTestData))
np.save("pitchShiftTestLabel.npy", pitchShiftTestLabel.astype(np.int))

In [None]:
# def extractMelSpec(
#     path: str, flip: bool = False, ps: bool = False, st: int = 4
# ) -> np.ndarray:
#     """[提取mel頻譜]

#     Arguments:
#         path {str} -- [路徑]

#     Keyword Arguments:
#         flip {bool} -- [矩陣反轉] (default: {False})
#         ps {bool} -- [是否調整音階]] (default: {False})
#         st {int} -- [調整幾階]] (default: {4})

#     Returns:
#         np.ndarray -- [mel頻譜]
#     """
#     try:
#         sig, rate = librosa.load(
#             path, offset=0.0, res_type="kaiser_fast", dtype=np.float32
#         )
#         if len(sig) < 22050:  # pad shorter than 1 sec audio with ramp to zero
#             sig = np.pad(sig, (0, 22050 - len(sig)), "linear_ramp")
#         if ps:
#             sig = librosa.effects.pitch_shift(sig, rate, n_steps=st)
#         db = librosa.amplitude_to_db(
#             librosa.stft(sig[:22050], hop_length=256, center=False), ref=np.max
#         )
#         spec = librosa.feature.melspectrogram(S=db, n_mels=128).T
#         if flip:
#             spec = np.flipud(spec)
#     except Exception as e:
#         print("Error encountered while parsing file:%s" % (path))
#         return None
#     return spec.astype(np.float32)



# def creatMelSpecSets(
#     path: str, dataList: list, shape: tuple
# ) -> (np.ndarray, np.ndarray):
#     """[創建訓練梅爾頻譜資料]

#     Arguments:
#         path {str} -- [路徑]
#         dataList {list} -- [檔案列表]
#         shape {tuple} -- [矩陣維度]
#     Returns:
#         [(np.ndarray, np.ndarray)] -- [(特徵,種類)]
#     """
#     shape1, shape2 = shape
#     xArray = np.zeros([len(dataList), shape1, shape2])
#     yArray = np.zeros([len(dataList)])
#     for index, file in enumerate(dataList):
#         file = path + file
#         try:
#             xArray[index] = extractMelSpec(file)
#             yArray[index] = file.rsplit("/", 2)[1]
#         except ValueError:
#             print(index, file, ValueError)
#     return (xArray, yArray)




# melTrainData, melTrainLabel = creatMelSpecSets(
#     "./train/", trainList, extractMelSpec("./train/" + trainList[0]).shape
# )

# melValidData, melValidLabel = creatMelSpecSets(
#     "./valid/", validList, extractMelSpec("./valid/" + validList[0]).shape
# )

In [6]:
trainData.reshape(1000, 277, 1)

array([[[-2.68393677e+02],
        [ 1.70615356e+02],
        [-8.51688538e+01],
        ...,
        [ 1.22741458e-05],
        [ 2.69229898e+03],
        [ 4.38340552e-02]],

       [[-2.77341370e+02],
        [ 1.77488068e+02],
        [-8.89495392e+01],
        ...,
        [ 2.11430211e-06],
        [ 2.40316880e+03],
        [ 4.97998814e-02]],

       [[-2.76884827e+02],
        [ 1.72680237e+02],
        [-8.73820877e+01],
        ...,
        [ 6.79598379e-05],
        [ 2.55603262e+03],
        [ 5.02216361e-02]],

       ...,

       [[-2.80193146e+02],
        [ 1.24102409e+02],
        [-1.14348129e+02],
        ...,
        [ 1.65024758e-04],
        [ 3.30756037e+03],
        [ 3.22950003e-02]],

       [[-2.95152283e+02],
        [ 1.48257584e+02],
        [-9.92873383e+01],
        ...,
        [ 4.32145389e-05],
        [ 2.99937012e+03],
        [ 3.22624434e-02]],

       [[-2.68486206e+02],
        [ 1.37189514e+02],
        [-1.15576912e+02],
        ...,
        