In [4]:
import os
import numpy as np
import scipy
from scipy.io import wavfile
import scipy.fftpack as fft
from scipy.signal import get_window
import pandas
import matplotlib.pyplot as plt
import speechpy

In [5]:

def normalize_audio(audio):
    audio = audio/np.max(np.abs(audio))
    return audio

def frame_audio(audio, FFT_size = 2048, hop_size = 512, sample_rate=44100):
    audio = np.pad(audio, int(FFT_size/2), mode='reflect')
    frame_len = np.round(sample_rate * hop_size / 1000).astype(int)
    frame_num = int((len(audio) - FFT_size) / frame_len) + 1
    frames = np.zeros((frame_num, FFT_size))
    for n in range(frame_num):
        frames[n] = audio[n*frame_len : n*frame_len+FFT_size]
    return frames

def freq_to_mel(freq):
    return 2595.0 * np.log10(1.0 + freq / 700.0)

def met_to_freq(mels):
    return 700.0 * (10.0**(mels / 2595.0) - 1.0)

def get_filter_points(fmin, fmax, mel_filter_num, FFT_size, sample_rate=44100):
    fmin_mel = freq_to_mel(fmin)
    fmax_mel = freq_to_mel(fmax)    
    mels = np.linspace(fmin_mel, fmax_mel, num=mel_filter_num+2)
    freqs = met_to_freq(mels)    
    return np.floor((FFT_size + 1) / sample_rate * freqs).astype(int), freqs

def get_filters(filter_points, FFT_size):
    filters = np.zeros((len(filter_points)-2, int((FFT_size/2)+1)))
    for n in range(len(filter_points)-2):
        filters[n, filter_points[n] : filter_points[n + 1]] = np.linspace(0, 1, filter_points[n + 1] - filter_points[n])
        filters[n, filter_points[n + 1] : filter_points[n + 2]] = np.linspace(1, 0, filter_points[n + 2] - filter_points[n + 1])
    return filters

def dct(dct_filter_num, filter_len):
    basis = np.empty((dct_filter_num,filter_len))
    basis[0, :] = 1.0 / np.sqrt(filter_len)
    
    samples = np.arange(1, 2 * filter_len, 2) * np.pi / (2.0 * filter_len)

    for i in range(1, dct_filter_num):
        basis[i, :] = np.cos(i * samples) * np.sqrt(2.0 / filter_len)
        
    return basis

In [6]:
trainpath = "C:/Users/Dataset/"
filename = ["DataLatih/Lock/Lock","DataLatih/Unlock/Unlock"]

In [None]:
#MFCC Data latih
count = 0
datacount = 720*2 #jumlah data latih
fiturmean = np.empty((40+1, datacount))
for kategori in range(2):
    for i in range(720):
        #open file & get sample rate
        sample_rate, audio = wavfile.read(trainpath + filename[kategori] + " ("+ str(i+1) +").wav")
        print(filename[kategori] + " ("+ str(i+1) +").wav")

        #normalize audio
        if (len(audio.shape) > 1):
            audio1 = normalize_audio(audio[:,0])
        else:
            audio1 = normalize_audio(audio)

        #crop the blank moment 
        threshold=0.1
        awal = 0
        for x in range (len(audio1)):
            if np.abs(audio1[x]) >= threshold:
                awal=x #Data sinyal ke-x sebagai sinyal awal
                break
        audiohasil=audio1[awal:len(audio1)]#mengambil data sinyal mulai dari data ke-x sd data terakhir

        for x in range (len(audiohasil)):
            if np.abs(audiohasil[x]) >=threshold:
                akhir=x #Data sinyal ke-x yg terakhir
        audiohasil2=audiohasil[0:akhir]

        #audio framing
        hop_size = 512 
        FFT_size = 2048
        audio_framed = frame_audio(audiohasil2, FFT_size=FFT_size, hop_size=hop_size, sample_rate=sample_rate)

        #windowing
        window = get_window("hamming", FFT_size, fftbins=True)
        audio_win = audio_framed * window

        #fft
        audio_winT = np.transpose(audio_win)
        audio_fft = np.empty((int(1 + FFT_size // 2), audio_winT.shape[1]), dtype=np.complex64, order='F')
        for n in range(audio_fft.shape[1]):
            audio_fft[:, n] = fft.fft(audio_winT[:, n], axis=0)[:audio_fft.shape[0]]
        audio_fft = np.transpose(audio_fft)

        #power spectrum
        audio_power = np.square(np.abs(audio_fft))

        #mel filter bank
        freq_min = 0
        freq_high = sample_rate / 2
        mel_filter_num = 10
        filter_points, mel_freqs = get_filter_points(freq_min, freq_high, mel_filter_num, FFT_size, sample_rate)
        filters = get_filters(filter_points, FFT_size)
        enorm = 2.0 / (mel_freqs[2:mel_filter_num+2] - mel_freqs[:mel_filter_num])
        filters *= enorm[:, np.newaxis]

        audio_filtered = np.dot(filters, np.transpose(audio_power))
        audio_log = 10.0 * np.log10(audio_filtered)

        #DCT & cepstral coefficient
        dct_filter_num = 40
        dct_filters = dct(dct_filter_num, mel_filter_num)
        cepstral_coefficents = np.dot(dct_filters, audio_log)

        #normalizing cepstral coefficient value using CMVN method ((Xn-Xmean)/Variance)
        cepstral_coefficents = speechpy.processing.cmvn(cepstral_coefficents,True)

        #ceptral coeffienct dirata2kan
        for xpos in range(len(cepstral_coefficents)):
            sigmax = 0
            for xn in cepstral_coefficents[xpos,:]:
                sigmax += xn
            fiturmean[xpos,count] = sigmax/len(np.transpose(cepstral_coefficents))
        fiturmean[-1,count] = kategori

        count+=1
indextable = []
for i in range(40):
    indextable.append("fitur" + str(i+1))
indextable.append("klasifikasi")

df = pandas.DataFrame(np.transpose(fiturmean),columns=indextable)
df.to_excel("ekstraksilatih.xlsx", index=False)

DataLatih/Lock/Lock (1).wav
DataLatih/Lock/Lock (2).wav
DataLatih/Lock/Lock (3).wav
DataLatih/Lock/Lock (4).wav
DataLatih/Lock/Lock (5).wav
DataLatih/Lock/Lock (6).wav
DataLatih/Lock/Lock (7).wav
DataLatih/Lock/Lock (8).wav
DataLatih/Lock/Lock (9).wav
DataLatih/Lock/Lock (10).wav
DataLatih/Lock/Lock (11).wav
DataLatih/Lock/Lock (12).wav
DataLatih/Lock/Lock (13).wav
DataLatih/Lock/Lock (14).wav
DataLatih/Lock/Lock (15).wav
DataLatih/Lock/Lock (16).wav
DataLatih/Lock/Lock (17).wav
DataLatih/Lock/Lock (18).wav
DataLatih/Lock/Lock (19).wav
DataLatih/Lock/Lock (20).wav
DataLatih/Lock/Lock (21).wav
DataLatih/Lock/Lock (22).wav
DataLatih/Lock/Lock (23).wav
DataLatih/Lock/Lock (24).wav
DataLatih/Lock/Lock (25).wav
DataLatih/Lock/Lock (26).wav
DataLatih/Lock/Lock (27).wav
DataLatih/Lock/Lock (28).wav
DataLatih/Lock/Lock (29).wav
DataLatih/Lock/Lock (30).wav
DataLatih/Lock/Lock (31).wav
DataLatih/Lock/Lock (32).wav
DataLatih/Lock/Lock (33).wav
DataLatih/Lock/Lock (34).wav
DataLatih/Lock/Lock (35

DataLatih/Lock/Lock (278).wav
DataLatih/Lock/Lock (279).wav
DataLatih/Lock/Lock (280).wav
DataLatih/Lock/Lock (281).wav
DataLatih/Lock/Lock (282).wav
DataLatih/Lock/Lock (283).wav
DataLatih/Lock/Lock (284).wav
DataLatih/Lock/Lock (285).wav
DataLatih/Lock/Lock (286).wav
DataLatih/Lock/Lock (287).wav
DataLatih/Lock/Lock (288).wav
DataLatih/Lock/Lock (289).wav
DataLatih/Lock/Lock (290).wav
DataLatih/Lock/Lock (291).wav
DataLatih/Lock/Lock (292).wav
DataLatih/Lock/Lock (293).wav
DataLatih/Lock/Lock (294).wav
DataLatih/Lock/Lock (295).wav
DataLatih/Lock/Lock (296).wav
DataLatih/Lock/Lock (297).wav
DataLatih/Lock/Lock (298).wav
DataLatih/Lock/Lock (299).wav
DataLatih/Lock/Lock (300).wav
DataLatih/Lock/Lock (301).wav
DataLatih/Lock/Lock (302).wav
DataLatih/Lock/Lock (303).wav
DataLatih/Lock/Lock (304).wav
DataLatih/Lock/Lock (305).wav
DataLatih/Lock/Lock (306).wav
DataLatih/Lock/Lock (307).wav
DataLatih/Lock/Lock (308).wav
DataLatih/Lock/Lock (309).wav
DataLatih/Lock/Lock (310).wav
DataLatih/

In [9]:
#buat data uji
count = 0
datacount = 180*2 #jumlah data latih
fiturmean = np.empty((40+1, datacount))
for kategori in range(2):
    for i in range(180):
        #open file & get sample rate
        sample_rate, audio = wavfile.read(trainpath + filename[kategori] + " ("+ str(i+721) +").wav")
        print(filename[kategori] + " ("+ str(i+721) +").wav")

        #normalisasi audio
        if (len(audio.shape) > 1):
            audio1 = normalize_audio(audio[:,0]) 
        else:
            audio1 = normalize_audio(audio)

        #memotong bagian silence
        threshold=0.1
        awal = 0
        for x in range (len(audio1)):
            if np.abs(audio1[x]) >= threshold:
                awal=x 
                break
        audiohasil=audio1[awal:len(audio1)]

        for x in range (len(audiohasil)):
            if np.abs(audiohasil[x]) >=threshold:
                akhir=x 
        audiohasil2=audiohasil[0:akhir]

        #audio framing
        hop_size = 512
        FFT_size = 2048
        audio_framed = frame_audio(audiohasil2, FFT_size=FFT_size, hop_size=hop_size, sample_rate=sample_rate)

        #windowing
        window = get_window("hamming", FFT_size, fftbins=True)
        audio_win = audio_framed * window

        #fft
        audio_winT = np.transpose(audio_win)
        audio_fft = np.empty((int(1 + FFT_size // 2), audio_winT.shape[1]), dtype=np.complex64, order='F')
        for n in range(audio_fft.shape[1]):
            audio_fft[:, n] = fft.fft(audio_winT[:, n], axis=0)[:audio_fft.shape[0]]
        audio_fft = np.transpose(audio_fft)

        #power spectrum
        audio_power = np.square(np.abs(audio_fft))

        #creating mel filter bank
        freq_min = 0
        freq_high = sample_rate / 2
        mel_filter_num = 10
        filter_points, mel_freqs = get_filter_points(freq_min, freq_high, mel_filter_num, FFT_size, sample_rate)
        filters = get_filters(filter_points, FFT_size)
        enorm = 2.0 / (mel_freqs[2:mel_filter_num+2] - mel_freqs[:mel_filter_num])
        filters *= enorm[:, np.newaxis]

        audio_filtered = np.dot(filters, np.transpose(audio_power))
        audio_log = 10.0 * np.log10(audio_filtered)

        #create cepstral coefficient
        dct_filter_num = 40
        dct_filters = dct(dct_filter_num, mel_filter_num)
        cepstral_coefficents = np.dot(dct_filters, audio_log)

        #normalizing cepstral coefficient value using CMVN method ((Xn-Xmean)/Variance)
        cepstral_coefficents = speechpy.processing.cmvn(cepstral_coefficents,True)

        #ceptral coeffienct dirata2kan
        for xpos in range(len(cepstral_coefficents)):
            sigmax = 0
            for xn in cepstral_coefficents[xpos,:]:
                sigmax += xn
            fiturmean[xpos,count] = sigmax/len(np.transpose(cepstral_coefficents))
        fiturmean[-1,count] = kategori

        count+=1
indextable = []
for i in range(40):
    indextable.append("fitur" + str(i+1))
indextable.append("klasifikasi")

df = pandas.DataFrame(np.transpose(fiturmean),columns=indextable)
df.to_excel("ekstraksiuji.xlsx", index=False)

DataLatih/Lock/Lock (721).wav
DataLatih/Lock/Lock (722).wav
DataLatih/Lock/Lock (723).wav
DataLatih/Lock/Lock (724).wav
DataLatih/Lock/Lock (725).wav
DataLatih/Lock/Lock (726).wav
DataLatih/Lock/Lock (727).wav
DataLatih/Lock/Lock (728).wav
DataLatih/Lock/Lock (729).wav
DataLatih/Lock/Lock (730).wav
DataLatih/Lock/Lock (731).wav
DataLatih/Lock/Lock (732).wav
DataLatih/Lock/Lock (733).wav
DataLatih/Lock/Lock (734).wav
DataLatih/Lock/Lock (735).wav
DataLatih/Lock/Lock (736).wav
DataLatih/Lock/Lock (737).wav
DataLatih/Lock/Lock (738).wav
DataLatih/Lock/Lock (739).wav
DataLatih/Lock/Lock (740).wav
DataLatih/Lock/Lock (741).wav
DataLatih/Lock/Lock (742).wav
DataLatih/Lock/Lock (743).wav
DataLatih/Lock/Lock (744).wav
DataLatih/Lock/Lock (745).wav
DataLatih/Lock/Lock (746).wav
DataLatih/Lock/Lock (747).wav
DataLatih/Lock/Lock (748).wav
DataLatih/Lock/Lock (749).wav
DataLatih/Lock/Lock (750).wav
DataLatih/Lock/Lock (751).wav
DataLatih/Lock/Lock (752).wav
DataLatih/Lock/Lock (753).wav
DataLatih/

DataLatih/Unlock/Unlock (804).wav
DataLatih/Unlock/Unlock (805).wav
DataLatih/Unlock/Unlock (806).wav
DataLatih/Unlock/Unlock (807).wav
DataLatih/Unlock/Unlock (808).wav
DataLatih/Unlock/Unlock (809).wav
DataLatih/Unlock/Unlock (810).wav
DataLatih/Unlock/Unlock (811).wav
DataLatih/Unlock/Unlock (812).wav
DataLatih/Unlock/Unlock (813).wav
DataLatih/Unlock/Unlock (814).wav
DataLatih/Unlock/Unlock (815).wav
DataLatih/Unlock/Unlock (816).wav
DataLatih/Unlock/Unlock (817).wav
DataLatih/Unlock/Unlock (818).wav
DataLatih/Unlock/Unlock (819).wav
DataLatih/Unlock/Unlock (820).wav
DataLatih/Unlock/Unlock (821).wav
DataLatih/Unlock/Unlock (822).wav
DataLatih/Unlock/Unlock (823).wav
DataLatih/Unlock/Unlock (824).wav
DataLatih/Unlock/Unlock (825).wav
DataLatih/Unlock/Unlock (826).wav
DataLatih/Unlock/Unlock (827).wav
DataLatih/Unlock/Unlock (828).wav
DataLatih/Unlock/Unlock (829).wav
DataLatih/Unlock/Unlock (830).wav
DataLatih/Unlock/Unlock (831).wav
DataLatih/Unlock/Unlock (832).wav
DataLatih/Unlo