In [1]:
import librosa
import numpy as np
from scipy.fftpack import dct
import csv
import scipy.io.wavfile
import scipy.signal
from scipy import signal
import soundfile as sf
import warnings
from scipy.io.wavfile import WavFileWarning

###-----------------Zero-crossings

def ZCR(audio_file):
    # đọc file audio
    audio, _ = sf.read(audio_file)

    # Convert snag kênh mono
    if len(audio.shape) > 1:
        audio = np.mean(audio, axis=1)

    # tính ZCR
    zcr = 0
    for i in range(1, len(audio)):
        if (audio[i-1] >= 0 and audio[i] < 0) or (audio[i-1] < 0 and audio[i] >= 0):
            zcr += 1
    return zcr


###-----------------Average-Energy

def AE(audio_file):
    # Đọc file âm thanh
    audio, sr = librosa.load(audio_file, sr=None)

    # Chia file âm thanh thành các frame có độ dài 2048 mẫu (khoảng 46ms với tỉ lệ mẫu 44100Hz)
    frame_length = int(2048 * sr / 44100)
    hop_length = frame_length // 2  # Đặt hop_length bằng một nửa frame_length
    frames = librosa.util.frame(audio, frame_length=frame_length, hop_length=hop_length)

    # Tính toán năng lượng của mỗi frame, binh phg từng giá trị trong hàng rồi sum
    energies = []
    for frame in frames.T:
        energy = sum(frame ** 2)

        energies.append(energy)

    # Tính toán trung bình năng lượng của các frame
    average_energy = sum(energies) / len(energies)

    return average_energy


###-----------------Spectral-centroid

def SC(audio_file):
    audio, sample_rate = librosa.load(audio_file)
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)
    mean = np.mean(spectral_centroid)
    return mean


###-----------------MFCC

def mfcc(audio_file, n_mfccs):
    warnings.filterwarnings("ignore", category=WavFileWarning)
    sample_rate, signal = scipy.io.wavfile.read(audio_file)
    pre_emphasis = 0.97  # tien xu ly
    emphasized_signal = emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])

    # chia frame
    frame_stride = 0.01
    frame_size = 0.025

    frame_length = frame_size * sample_rate
    frame_step = frame_stride * sample_rate
    signal_length = len(emphasized_signal)  ##
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))

    pad_signal_length = num_frames * frame_step + frame_length  ##
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(
        np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    frames *= np.hamming(frame_length)  # hamming

    # FFT convert
    NFFT = 512
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))

    # mel filter
    nfilt = 40
    low_freq_mel = 0
    high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700))  # Convert Hz to Mel
    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)  # Equally spaced in Mel scale
    hz_points = (700 * (10 ** (mel_points / 2595) - 1))  # Convert Mel to Hz
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])  # left
        f_m = int(bin[m])  # center
        f_m_plus = int(bin[m + 1])  # right

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])

    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability
    filter_banks = 20 * np.log10(filter_banks)  # dB

    # dct
    num_ceps = n_mfccs
    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1: (num_ceps + 1)]  # Keep 2-13
    mfcc = np.mean(mfcc, axis=0)

    return mfcc


audio_folder = ["DonTau", "HoaTau", "SongTau"]

for i in range(0, len(audio_folder)):
    instrument = audio_folder[i]
    audio_type = "\\" + instrument
    audio_link = "C:\\Users\\Admin\\Desktop\\CSDLDPT_BAOCAO\\Audio"
    
    linkAudioArr = []  # get link file audio
    for i in range(1, 30):
        audio_full = audio_link + audio_type + audio_type + " (" + str(i) + ").wav"
        linkAudioArr.append(audio_full)
        print(audio_full)

    listFeature = []  #
    for item in linkAudioArr:
        warnings.filterwarnings("ignore", category=WavFileWarning)
        sample_rate, signal = scipy.io.wavfile.read(item)
        mfccs = mfcc(item, 5)
        zc = ZCR(item)
        ae = AE(item)
        sc = SC(item)
        # Reshape zc and ae arrays to have 1 dimension
        zc = np.array([zc])
        ae = np.array([ae])
        sc = np.array([sc])
        # Concatenate the arrays
        arr = np.concatenate((mfccs, zc, ae, sc))
        arr = np.append(arr, instrument)
        listFeature.append(arr)

    fileCSV = open("C:\\Users\\Admin\\Desktop\\CSDLDPT_BAOCAO\\TrichRutDT.csv", "a", newline="")
    writer = csv.writer(fileCSV)
    writer.writerows(listFeature)
    fileCSV.close()

C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (1).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (2).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (3).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (4).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (5).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (6).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (7).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (8).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (9).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (10).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (11).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (12).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (13).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (14).wav
C:\Users\Admin\Desktop\CSDLDPT_BAOCAO\Audio\DonTau\DonTau (15).wav
C:\U