In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import Sequential,Model
from keras.layers import concatenate,Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D, GlobalMaxPooling2D, GlobalAveragePooling2D, AveragePooling2D, Input, Add, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.models import model_from_json
from sklearn.metrics import roc_curve
from keras.utils import np_utils
from tqdm.auto import tqdm
import pandas as pd
import numpy as np

import librosa 
import librosa.display
import pylab
import matplotlib.pyplot as plt
import cv2
import os

In [None]:
import wave
import os
import shutil

# The parameters are prerequisite information. More specifically,
# channels, bit_depth, sampling_rate must be known to use this function.
def pcm2wav( pcm_file, wav_file, channels=1, bit_depth=16, sampling_rate=44100 ):

    # Check if the options are valid.
    if bit_depth % 8 != 0:
        raise ValueError("bit_depth "+str(bit_depth)+" must be a multiple of 8.")
        
    # Read the .pcm file as a binary file and store the data to pcm_data
    with open( pcm_file, 'rb') as opened_pcm_file:
        pcm_data = opened_pcm_file.read();
        
        obj2write = wave.open( wav_file, 'wb')
        obj2write.setnchannels( channels )
        obj2write.setsampwidth( bit_depth // 8 )
        obj2write.setframerate( sampling_rate )
        obj2write.writeframes( pcm_data )
        obj2write.close()


In [9]:
def load_wav(path, sr):
    return librosa.core.load(path, sr=sr)[0]

def trim_silence(wav, top_db=23, fft_size=512, hop_size=128):
    return librosa.effects.trim(wav, top_db= top_db, frame_length=fft_size, hop_length=hop_size)[0]


def spectral_features(y, sr, n_fft, window_size):
    S, phase = librosa.magphase(librosa.stft(y=y,n_fft=n_fft, hop_length=window_size+1))

    centroid = librosa.feature.spectral_centroid(S=S,n_fft=n_fft)
    bandwidth = librosa.feature.spectral_bandwidth(S=S)
    flatness = librosa.feature.spectral_flatness(S=S)
    rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr)

    S = np.abs(librosa.stft(y,n_fft=n_fft,hop_length=window_size+1))
    contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
    
    return np.concatenate([centroid, bandwidth, flatness, rolloff, contrast], axis = 0)

In [10]:
def segment_cough(x, fs, cough_padding=0.2, min_cough_len=0.2, th_l_multiplier=0.1, th_h_multiplier=2):
    cough_mask = np.array([False] * len(x))

    # Define hysteresis thresholds
    rms = np.sqrt(np.mean(np.square(x)))
    seg_th_l = th_l_multiplier * rms
    seg_th_h = th_h_multiplier * rms
    # 기침소리 크기 컷팅을 위한 최저,최고 임계값

    # Segment coughs-> 기침소리 + 작은기침소리(+잡음)에서 앞부분만 자르기 위함
    coughSegments = []
    padding = round(fs * cough_padding)
    # 기침 컷팅 후 전후의 추가되는 샘플수
    min_cough_samples = round(fs * min_cough_len)
    # 기침의 최소샘플수
    cough_start = 0
    cough_end = 0
    cough_in_progress = False
    tolerance = round(0.01 * fs)
    below_th_counter = 0
    # 기침 진행 중 sample 값이 low임계값보다 작은 횟수

    for i, sample in enumerate(x ** 2):
        # enumerate를 사용해 현재 반복 횟수(i)와 현재 반복의 항목 값(sample) 저장
        if cough_in_progress:
            if sample < seg_th_l:
                below_th_counter += 1
                if below_th_counter > tolerance:
                    cough_end = i + padding if (i + padding < len(x)) else len(x) - 1
                    cough_in_progress = False
                    if (cough_end + 1 - cough_start - 2 * padding > min_cough_samples):
                        coughSegments.append(x[cough_start:cough_end + 1])
                        cough_mask[cough_start:cough_end + 1] = True
            elif i == (len(x) - 1):
                cough_end = i
                cough_in_progress = False
                if (cough_end + 1 - cough_start - 2 * padding > min_cough_samples):
                    coughSegments.append(x[cough_start:cough_end + 1])
            else:
                below_th_counter = 0
        else:
            if sample > seg_th_h:
                cough_start = i - padding if (i - padding >= 0) else 0
                cough_in_progress = True

            # think- 앞에 패딩을 해주기위한 과정인 거 같은데, 만약 기침 시작을 녹음과 동시에 시작으로 해서 기침 시작 sample의 index가 padding의 못 미치면?

    return coughSegments, cough_mask

def compute_SNR(x, fs):
    """Compute the Signal-to-Noise ratio of the audio signal x (np.array) with sampling frequency fs (float)"""
    segments, cough_mask = segment_cough(x, fs)
    RMS_signal = 0 if len(x[cough_mask]) == 0 else np.sqrt(np.mean(np.square(x[cough_mask])))
    RMS_noise = np.sqrt(np.mean(np.square(x[~cough_mask])))
    SNR = 0 if (RMS_signal == 0 or np.isnan(RMS_noise)) else 20 * np.log10(RMS_signal / RMS_noise)
    return SNR

record_covid segement,record_healthy segement

In [11]:
sr = 24000
rec_covid_dir="/content/drive/MyDrive/COVID19/dataset/normalized_covid/"

covid_file_list = os.listdir(rec_covid_dir)

covid_path = []
for i in covid_file_list:
    covid_path.append(rec_covid_dir + str(i))

rec_data_seg_c = {}
rec_data_seg_c['id'] = []
rec_data_seg_c['data'] = []
rec_data_seg_c['covid_status'] = []


for i in tqdm(range(len(covid_path))):       
    
    file = covid_path[i]
    y = load_wav(file, sr)
    cough_segments, cough_mask = segment_cough(y,sr, cough_padding=0)
    for j in range(len(cough_segments)):
        rec_data_seg_c['id'].append(covid_path[j])
        rec_data_seg_c['data'].append(cough_segments[j])
        rec_data_seg_c['covid_status'].append('COVID-19')


  0%|          | 0/652 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
sr = 24000
rec_healthy_dir="/content/drive/MyDrive/COVID19/dataset/normalized_healthy/"

healthy_file_list = os.listdir(rec_healthy_dir)

healthy_path = []
for i in healthy_file_list:
    healthy_path.append(rec_healthy_dir + str(i))
    

rec_data_seg_h = {}
rec_data_seg_h['id'] = []
rec_data_seg_h['data'] = []
rec_data_seg_h['covid_status'] = []


for i in tqdm(range(len(healthy_path))):       
    
    file = healthy_path[i]
    y = load_wav(file, sr)
    cough_segments, cough_mask = segment_cough(y,sr, cough_padding=0)
    for j in range(len(cough_segments)):
        rec_data_seg_h['id'].append(healthy_path[j])
        rec_data_seg_h['data'].append(cough_segments[j])
        rec_data_seg_h['covid_status'].append('healthy')


In [None]:
import pickle
pickle.dump( rec_data_seg_c, open( "/content/drive/MyDrive/COVID19/separability_seg_rec_c.pickle", "wb" ) ) ##자른 데이터 저장
pickle.dump( rec_data_seg_h, open( "/content/drive/MyDrive/COVID19/separability_seg_rec_h.pickle", "wb" ) ) ##자른 데이터 저장


In [None]:
ds_rec_h = pd.DataFrame.from_dict(rec_data_seg_h)
ds_rec_c = pd.DataFrame.from_dict(rec_data_seg_c)
#data segement를 받아 DataFrame반환 = ds


is_rec_healthy_seg = ds_rec_h['covid_status'] == 'healthy'
rec_healthy_seg = ds_rec_h[is_rec_healthy_seg]
len_rec_healthy_seg = len(rec_healthy_seg)

is_rec_covid_seg = ds_rec_c['covid_status'] == 'COVID-19'
rec_covid_seg = ds_rec_c[is_rec_covid_seg]
len_rec_covid_seg = len(rec_covid_seg)

print(len_rec_healthy_seg)
print(len_rec_covid_seg)

2232
1642


record_all segement

In [12]:
import pickle #저장해놓은 데이터 열기

rec_data_seg_h = {}
rec_data_seg_h['data'] = []
rec_data_seg_h['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/separability_seg_rec_h.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    rec_data_seg_h = pickle.load(f)
    
rec_data_seg_c = {}
rec_data_seg_c['data'] = []
rec_data_seg_c['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/separability_seg_rec_c.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    rec_data_seg_c = pickle.load(f)



In [13]:
ds_rec_h = pd.DataFrame.from_dict(rec_data_seg_h)
ds_rec_c = pd.DataFrame.from_dict(rec_data_seg_c)
#data segement를 받아 DataFrame반환 = ds


is_rec_healthy_seg = ds_rec_h['covid_status'] == 'healthy'
rec_healthy_seg = ds_rec_h[is_rec_healthy_seg]
len_rec_healthy_seg = len(rec_healthy_seg)

is_rec_covid_seg = ds_rec_c['covid_status'] == 'COVID-19'
rec_covid_seg = ds_rec_c[is_rec_covid_seg]
len_rec_covid_seg = len(rec_covid_seg)

print(len_rec_healthy_seg)
print(len_rec_covid_seg)

2232
1642


In [None]:
sr = 24000

rec_data_seg = {}
rec_data_seg['id'] = []
rec_data_seg['data'] = []
rec_data_seg['covid_status'] = []


for i in tqdm(range(2232)):       
    rec_data_seg['id'].append('healthy'+str(i))
    rec_data_seg['data'].append(rec_data_seg_h['data'][i])
    rec_data_seg['covid_status'].append(rec_data_seg_h['covid_status'][i])

for i in tqdm(range(1642)):       
    rec_data_seg['id'].append('covid'+str(i))
    rec_data_seg['data'].append(rec_data_seg_c['data'][i])
    rec_data_seg['covid_status'].append(rec_data_seg_c['covid_status'][i])

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
import pickle
pickle.dump( rec_data_seg, open( "/content/drive/MyDrive/COVID19/rec_seg.pickle", "wb" ) ) ##자른 데이터 저장

In [3]:
import pickle #저장해놓은 데이터 열기

rec_data_seg = {}
rec_data_seg['data'] = []
rec_data_seg['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/rec_seg.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    rec_data_seg = pickle.load(f)

In [4]:
ds_rec = pd.DataFrame.from_dict(rec_data_seg)
#data segement를 받아 DataFrame반환 = ds


is_healthy_seg = ds_rec['covid_status'] == 'healthy'
healthy_seg = ds_rec[is_healthy_seg]
len_healthy_seg = len(healthy_seg)

is_covid_seg = ds_rec['covid_status'] == 'COVID-19'
covid_seg = ds_rec[is_covid_seg]
len_covid_seg = len(covid_seg)

print(len_healthy_seg)
print(len_covid_seg)

2232
1642


---
# 1-Prepare Data
---



## Prepare dataset

+  coughvid 

In [None]:
import pickle #저장해놓은 데이터 열기

data_seg_h = {}
data_seg_h['id'] = []
data_seg_h['data'] = []
data_seg_h['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/separability_seg_h.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    data_seg_h = pickle.load(f)
    
data_seg_c = {}
data_seg_c['id'] = []
data_seg_c['data'] = []
data_seg_c['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/separability_seg_c.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    data_seg_c = pickle.load(f)

**covid 데이터, healthy 데이터**



In [None]:
ds_h = pd.DataFrame.from_dict(data_seg_h)
ds_c = pd.DataFrame.from_dict(data_seg_c)
#data segement를 받아 DataFrame반환 = ds


is_healthy_seg = ds_h['covid_status'] == 'healthy'
healthy_seg = ds_h[is_healthy_seg]
len_healthy_seg = len(healthy_seg)

is_covid_seg = ds_c['covid_status'] == 'COVID-19'
covid_seg = ds_c[is_covid_seg]
len_covid_seg = len(covid_seg)

print(len_healthy_seg)
print(len_covid_seg)


1339
1140


In [None]:
ds_h

Unnamed: 0,id,data,covid_status
0,16f6c8ca-d05c-49f7-9dd1-2552302422b5,"[-0.5618683, -0.29613346, -0.10295212, -0.1602...",healthy
1,16f6c8ca-d05c-49f7-9dd1-2552302422b5,"[-0.71100324, -0.42267305, -0.23587985, 0.1221...",healthy
2,16f6c8ca-d05c-49f7-9dd1-2552302422b5,"[-0.57699513, -0.5523702, -0.36642045, -0.0569...",healthy
3,16f6c8ca-d05c-49f7-9dd1-2552302422b5,"[-0.52435374, -0.46784508, -0.35949245, -0.052...",healthy
4,16f6c8ca-d05c-49f7-9dd1-2552302422b5,"[-0.61304903, -0.588954, -0.16296703, 0.391249...",healthy
...,...,...,...
1334,0ff759b9-5711-406d-a7a1-1d40ffc14317,"[0.67883795, 0.81053376, 0.8389153, 0.7409207,...",healthy
1335,0ff759b9-5711-406d-a7a1-1d40ffc14317,"[0.58560896, 0.63914764, 0.65668136, 0.6246286...",healthy
1336,19c0e1a7-90d3-451f-9091-8ba9cf93b181,"[-0.42083806, -0.41979614, -0.3212672, -0.1525...",healthy
1337,19c0e1a7-90d3-451f-9091-8ba9cf93b181,"[-0.4260451, -0.39163622, -0.2800445, -0.43138...",healthy


In [None]:
ds_c

**record_covid 데이터, record_healthy 데이터**

In [14]:
import pickle #저장해놓은 데이터 열기

rec_data_seg_h = {}
rec_data_seg_h['data'] = []
rec_data_seg_h['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/separability_seg_rec_h.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    data_seg_rec_h = pickle.load(f)
    
rec_data_seg_c = {}
rec_data_seg_c['data'] = []
rec_data_seg_c['covid_status'] = []

with open('/content/drive/MyDrive/COVID19/separability_seg_rec_c.pickle', 'rb') as f:
    #파일을 열고 닫는 것을 자동으로 처리
    data_seg_rec_c = pickle.load(f)



In [15]:
ds_rec_h = pd.DataFrame.from_dict(data_seg_rec_h)
ds_rec_c = pd.DataFrame.from_dict(data_seg_rec_c)
#data segement를 받아 DataFrame반환 = ds


is_rec_healthy_seg = ds_rec_h['covid_status'] == 'healthy'
rec_healthy_seg = ds_rec_h[is_rec_healthy_seg]
len_rec_healthy_seg = len(rec_healthy_seg)

is_rec_covid_seg = ds_rec_c['covid_status'] == 'COVID-19'
rec_covid_seg = ds_rec_c[is_rec_covid_seg]
len_rec_covid_seg = len(rec_covid_seg)

print(len_rec_healthy_seg)
print(len_rec_covid_seg)

2232
1642


In [16]:
ds_rec_h

Unnamed: 0,id,data,covid_status
0,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.411839, 0.15610811, -0.06748126, -0.3013237...",healthy
1,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.45728722, 0.28256226, 0.07147584, -0.069038...",healthy
2,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.39101714, 0.28078565, -0.03763746, -0.41127...",healthy
3,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.43599373, 0.21280429, 0.08150704, -0.025913...",healthy
4,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.5230554, 0.32768637, 0.102763444, -0.073806...",healthy
...,...,...,...
2227,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.34473768, 0.31424418, 0.16304643, 0.0776310...",healthy
2228,/content/drive/MyDrive/COVID19/dataset/normali...,"[-0.40200678, -0.29110616, -0.02898339, 0.1048...",healthy
2229,/content/drive/MyDrive/COVID19/dataset/normali...,"[0.38482198, 0.3355954, 0.15460977, -0.1471588...",healthy
2230,/content/drive/MyDrive/COVID19/dataset/normali...,"[-0.39665648, -0.31453294, -0.09761539, 0.2654...",healthy


------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
**mfcc, cent, bandwidth, contrast, flatness, rolloff, chroma, zero crossing, mfcc_delta, mfcc_delta2, Rms energy, onset 함수**
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2차원 형변환 필요: cent, bandwidth, flatness, rolloff, zero crossing, Rms energy, onset

In [6]:
sr=24000
def mfccs(row):

    name     = row[0]
    try:
        audio = row[1]
       
        mfccs    = librosa.feature.mfcc(y=audio,sr=sr,n_mfcc=13)  #20
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except:
        print('File cannot open:',name)
        return None
    return mfccsscaled

def cent(row):

    name     = row[0]
    try:
        audio = row[1]
       
        cent = librosa.feature.spectral_centroid(y=audio,sr=sr) #1
        centscaled = np.mean(cent.T,axis=0)

    except:
        print('File cannot open:',name)
        return None
    return centscaled

def bandwidth(row):

    name     = row[0]
    try:
        audio = row[1]
       
        bandwidth = librosa.feature.spectral_bandwidth(y=audio,sr=sr)   #1
        bandwidthscaled = np.mean(bandwidth.T, axis=0)
        
    except:
        print('File cannot open:',name)
        return None
    return bandwidthscaled

def contrast(row):

    name     = row[0]
    try:
        audio = row[1]
       
        contrast = librosa.feature.spectral_contrast(y=audio,sr=sr) #7
        contrastscaled = np.mean(contrast.T, axis=0)
     
    except:
        print('File cannot open:',name)
        return None
    return contrastscaled

def flatness(row):

    name     = row[0]
    try:
        audio = row[1]
       
        flatness = librosa.feature.spectral_flatness(y=audio)   #1
        flatnessscaled = np.mean(flatness.T, axis=0)

    except:
        print('File cannot open:',name)
        return None
    return flatnessscaled


def rolloff(row):

    name     = row[0]
    try:
        audio = row[1]
       
        rolloff = librosa.feature.spectral_rolloff(y=audio,sr=sr)   #1
        rolloffscaled = np.mean(rolloff.T, axis=0)      
        
    except:
        print('File cannot open:',name)
        return None
    return rolloffscaled


def chroma_vec(row):

    name     = row[0]
    try:
        audio = row[1]
       
        chroma_vec = librosa.feature.chroma_stft(y=audio,sr=sr)   #12
        chroma_vecscaled = np.mean(chroma_vec.T, axis=0)
      
    except:
        print('File cannot open:',name)
        return None
    return chroma_vecscaled

def zero(row):

    name     = row[0]
    spec=[]
    try:
        audio = row[1]
        zero = librosa.feature.zero_crossing_rate(y=audio)   #1
        zeroscaled = np.mean(zero.T, axis=0)
        
    except:
        print('File cannot open:',name)
        print(spec)
        return None
    return zeroscaled

def mfcc_delta(row):

    name     = row[0]
    try:
        audio = row[1]
       
        mfccs    = librosa.feature.mfcc(y=audio,sr=sr)   #20
        mfcc_delta = librosa.feature.delta(mfccs)
        deltascaled= np.mean(mfcc_delta.T, axis=0)

        
    except:
        print('File cannot open:',name)
        return None
    return deltascaled

def mfcc_delta2(row):

    name     = row[0]
    try:
        audio = row[1]

        mfccs    = librosa.feature.mfcc(y=audio,sr=sr)   #20
        mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
        delta2scaled = np.mean(mfcc_delta2.T, axis=0)

        
    except:
        print('File cannot open:',name)
        return None
    return delta2scaled


def rms(row):

    name     = row[0]
    try:
        audio = row[1]

        rms = librosa.feature.rms(y=audio)   #1
        rmsscaled = np.mean(rms.T, axis=0)
        
    except:
        print('File cannot open:',name)
        return None
    return rmsscaled


def onset(row):

    name     = row[0]
    try:
        audio = row[1]

        onset = librosa.onset.onset_detect(y=audio,sr=sr)   #1
        onsetscaled = np.mean(onset.T, axis=0)

    except:
        print('File cannot open:',name)
        return None
    return onsetscaled

----------------------------------------------------------------------------------------------------------------------------------------------------------------
**mfcc**
-------------------------------------------------------------------------------




In [17]:
mfcc_h = []
mfcc_c = []
mfcc_rec_h = []
mfcc_rec_c = []

'''
for row in tqdm(ds_h.values[0:]):
    feature_set_h  = mfccs(row)
    mfcc_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = mfccs(row)
    mfcc_c.append(feature_set_c)
'''
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = mfccs(row)
    mfcc_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = mfccs(row)
    mfcc_rec_c.append(feature_set_c)



  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [18]:
mfcc_h_mean=[np.mean(mfcc_h,axis=0)]
mfcc_h_mean=np.array(mfcc_h_mean)
mfcc_h_covariance=np.cov(mfcc_h, rowvar=False)

mfcc_c_mean=[np.mean(mfcc_c,axis=0)]
mfcc_c_mean=np.array(mfcc_c_mean)
mfcc_c_covariance=np.cov(mfcc_c, rowvar=False)

mfcc_rec_h_mean=[np.mean(mfcc_rec_h,axis=0)]
mfcc_rec_h_mean=np.array(mfcc_rec_h_mean)
mfcc_rec_h_covariance=np.cov(mfcc_rec_h, rowvar=False)

mfcc_rec_c_mean=[np.mean(mfcc_rec_c,axis=0)]
mfcc_rec_c_mean=np.array(mfcc_rec_c_mean)
mfcc_rec_c_covariance=np.cov(mfcc_rec_c, rowvar=False)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  This is separate from the ipykernel package so we can avoid doing imports until
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  import sys


In [None]:
print(np.shape(np.array(mfcc_h_mean)))
print(np.shape(np.array(mfcc_h_covariance)))
print(np.shape(np.array(mfcc_rec_h_mean)))
print(np.shape(np.array(mfcc_rec_h_covariance)))

(1, 20)
(20, 20)
(1, 20)
(20, 20)


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**mfcc_delta**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
mfcc_delta_h = []
mfcc_delta_c = []
mfcc_delta_rec_h = []
mfcc_delta_rec_c = []


for row in tqdm(ds_h.values[0:]):
    feature_set_h  = mfcc_delta(row)
    mfcc_delta_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = mfcc_delta(row)
    mfcc_delta_c.append(feature_set_c)

for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = mfcc_delta(row)
    mfcc_delta_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = mfcc_delta(row)
    mfcc_delta_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
mfcc_delta_h_mean=[np.mean(mfcc_delta_h,axis=0)]
mfcc_delta_h_mean=np.array(mfcc_delta_h_mean)
mfcc_delta_h_covariance=np.cov(mfcc_delta_h, rowvar=False)

mfcc_delta_c_mean=[np.mean(mfcc_delta_c,axis=0)]
mfcc_delta_c_mean=np.array(mfcc_delta_c_mean)
mfcc_delta_c_covariance=np.cov(mfcc_delta_c, rowvar=False)

mfcc_delta_rec_h_mean=[np.mean(mfcc_delta_rec_h,axis=0)]
mfcc_delta_rec_h_mean=np.array(mfcc_delta_rec_h_mean)
mfcc_delta_rec_h_covariance=np.cov(mfcc_delta_rec_h, rowvar=False)

mfcc_delta_rec_c_mean=[np.mean(mfcc_delta_rec_c,axis=0)]
mfcc_delta_rec_c_mean=np.array(mfcc_delta_rec_c_mean)
mfcc_delta_rec_c_covariance=np.cov(mfcc_delta_rec_c, rowvar=False)


In [None]:
print(np.shape(np.array(mfcc_delta_h_mean)))
print(np.shape(np.array(mfcc_delta_h_covariance)))

print(np.shape(np.array(mfcc_delta_c_mean)))
print(np.shape(np.array(mfcc_delta_c_covariance)))

print(np.shape(np.array(mfcc_delta_rec_h_mean)))
print(np.shape(np.array(mfcc_delta_rec_h_covariance)))

print(np.shape(np.array(mfcc_delta_rec_c_mean)))
print(np.shape(np.array(mfcc_delta_rec_c_covariance)))

(1, 20)
(20, 20)
(1, 20)
(20, 20)
(1, 20)
(20, 20)
(1, 20)
(20, 20)


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**mfcc_delta2**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
mfcc_delta2_h = []
mfcc_delta2_c = []
mfcc_delta2_rec_h = []
mfcc_delta2_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = mfcc_delta2(row)
    mfcc_delta2_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = mfcc_delta2(row)
    mfcc_delta2_c.append(feature_set_c)

    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = mfcc_delta2(row)
    mfcc_delta2_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = mfcc_delta2(row)
    mfcc_delta2_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
mfcc_delta2_h_mean=[np.mean(mfcc_delta2_h,axis=0)]
mfcc_delta2_h_mean=np.array(mfcc_delta2_h_mean)
mfcc_delta2_h_covariance=np.cov(mfcc_delta2_h, rowvar=False)

mfcc_delta2_c_mean=[np.mean(mfcc_delta2_c,axis=0)]
mfcc_delta2_c_mean=np.array(mfcc_delta2_c_mean)
mfcc_delta2_c_covariance=np.cov(mfcc_delta2_c, rowvar=False)

mfcc_delta2_rec_h_mean=[np.mean(mfcc_delta2_rec_h,axis=0)]
mfcc_delta2_rec_h_mean=np.array(mfcc_delta2_rec_h_mean)
mfcc_delta2_rec_h_covariance=np.cov(mfcc_delta2_rec_h, rowvar=False)

mfcc_delta2_rec_c_mean=[np.mean(mfcc_delta2_rec_c,axis=0)]
mfcc_delta2_rec_c_mean=np.array(mfcc_delta2_rec_c_mean)
mfcc_delta2_rec_c_covariance=np.cov(mfcc_delta2_rec_c, rowvar=False)


In [None]:
print(np.shape(np.array(mfcc_delta2_h_mean)))
print(np.shape(np.array(mfcc_delta2_h_covariance)))

print(np.shape(np.array(mfcc_delta2_c_mean)))
print(np.shape(np.array(mfcc_delta2_c_covariance)))

print(np.shape(np.array(mfcc_delta2_rec_h_mean)))
print(np.shape(np.array(mfcc_delta2_rec_h_covariance)))

print(np.shape(np.array(mfcc_delta2_rec_c_mean)))
print(np.shape(np.array(mfcc_delta2_rec_c_covariance)))

(1, 20)
(20, 20)
(1, 20)
(20, 20)
(1, 20)
(20, 20)
(1, 20)
(20, 20)


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**cent**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
cent_h = []
cent_c = []
cent_rec_h = []
cent_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = cent(row)
    cent_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = cent(row)
    cent_c.append(feature_set_c)

for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = cent(row)
    cent_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = cent(row)
    cent_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
cent_h_mean=[np.mean(cent_h,axis=0)]
cent_h_mean=np.array(cent_h_mean)
cent_h_covariance=np.cov(cent_h, rowvar=False)
cent_h_covariance= np.array([cent_h_covariance])
cent_h_covariance= np.array([cent_h_covariance])

cent_c_mean=[np.mean(cent_c,axis=0)]
cent_c_mean=np.array(cent_c_mean)
cent_c_covariance=np.cov(cent_c, rowvar=False)
cent_c_covariance= np.array([cent_c_covariance])
cent_c_covariance= np.array([cent_c_covariance])

cent_rec_h_mean=[np.mean(cent_rec_h,axis=0)]
cent_rec_h_mean=np.array(cent_rec_h_mean)
cent_rec_h_covariance=np.cov(cent_rec_h, rowvar=False)
cent_rec_h_covariance= np.array([cent_rec_h_covariance])
cent_rec_h_covariance= np.array([cent_rec_h_covariance])

cent_rec_c_mean=[np.mean(cent_rec_c,axis=0)]
cent_rec_c_mean=np.array(cent_rec_c_mean)
cent_rec_c_covariance=np.cov(cent_rec_c, rowvar=False)
cent_rec_c_covariance= np.array([cent_rec_c_covariance])
cent_rec_c_covariance= np.array([cent_rec_c_covariance])


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**bandwidth**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
bandwidth_h = []
bandwidth_c = []
bandwidth_rec_h = []
bandwidth_rec_c = []



for row in tqdm(ds_h.values[0:]):
    feature_set_h  = bandwidth(row)
    bandwidth_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = bandwidth(row)
    bandwidth_c.append(feature_set_c)
    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = bandwidth(row)
    bandwidth_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = bandwidth(row)
    bandwidth_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
bandwidth_h_mean=[np.mean(bandwidth_h,axis=0)]
bandwidth_h_mean=np.array(bandwidth_h_mean)
bandwidth_h_covariance=np.cov(bandwidth_h, rowvar=False)
bandwidth_h_covariance= np.array([bandwidth_h_covariance])
bandwidth_h_covariance= np.array([bandwidth_h_covariance])

bandwidth_c_mean=[np.mean(bandwidth_c,axis=0)]
bandwidth_c_mean=np.array(bandwidth_c_mean)
bandwidth_c_covariance=np.cov(bandwidth_c, rowvar=False)
bandwidth_c_covariance= np.array([bandwidth_c_covariance])
bandwidth_c_covariance= np.array([bandwidth_c_covariance])

bandwidth_rec_h_mean=[np.mean(bandwidth_rec_h,axis=0)]
bandwidth_rec_h_mean=np.array(bandwidth_rec_h_mean)
bandwidth_rec_h_covariance=np.cov(bandwidth_rec_h, rowvar=False)
bandwidth_rec_h_covariance= np.array([bandwidth_rec_h_covariance])
bandwidth_rec_h_covariance= np.array([bandwidth_rec_h_covariance])

bandwidth_rec_c_mean=[np.mean(bandwidth_rec_c,axis=0)]
bandwidth_rec_c_mean=np.array(bandwidth_rec_c_mean)
bandwidth_rec_c_covariance=np.cov(bandwidth_rec_c, rowvar=False)
bandwidth_rec_c_covariance= np.array([bandwidth_rec_c_covariance])
bandwidth_rec_c_covariance= np.array([bandwidth_rec_c_covariance])

(1, 1)
(1, 1)
(1, 1)
(1, 1)


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**contrast**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
contrast_h = []
contrast_c = []
contrast_rec_h = []
contrast_rec_c = []


for row in tqdm(ds_h.values[0:]):
    feature_set_h  = contrast(row)
    contrast_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = contrast(row)
    contrast_c.append(feature_set_c)
    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = contrast(row)
    contrast_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = contrast(row)
    contrast_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
contrast_h_mean=[np.mean(contrast_h,axis=0)]
contrast_h_mean=np.array(contrast_h_mean)
contrast_h_covariance=np.cov(contrast_h, rowvar=False)

contrast_c_mean=[np.mean(contrast_c,axis=0)]
contrast_c_mean=np.array(contrast_c_mean)
contrast_c_covariance=np.cov(contrast_c, rowvar=False)

contrast_rec_h_mean=[np.mean(contrast_rec_h,axis=0)]
contrast_rec_h_mean=np.array(contrast_rec_h_mean)
contrast_rec_h_covariance=np.cov(contrast_rec_h, rowvar=False)

contrast_rec_c_mean=[np.mean(contrast_rec_c,axis=0)]
contrast_rec_c_mean=np.array(contrast_rec_c_mean)
contrast_rec_c_covariance=np.cov(contrast_rec_c, rowvar=False)


In [None]:
print(np.shape(np.array(contrast_h_mean)))
print(np.shape(np.array(contrast_h_covariance)))

print(np.shape(np.array(contrast_c_mean)))
print(np.shape(np.array(contrast_c_covariance)))

print(np.shape(np.array(contrast_rec_h_mean)))
print(np.shape(np.array(contrast_rec_h_covariance)))

print(np.shape(np.array(contrast_rec_c_mean)))
print(np.shape(np.array(contrast_rec_c_covariance)))

(1, 7)
(7, 7)
(1, 7)
(7, 7)
(1, 7)
(7, 7)
(1, 7)
(7, 7)


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**flatness**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
flatness_h = []
flatness_c = []
flatness_rec_h = []
flatness_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = flatness(row)
    flatness_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = flatness(row)
    flatness_c.append(feature_set_c)
    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = flatness(row)
    flatness_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = flatness(row)
    flatness_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
flatness_h_mean=[np.mean(flatness_h,axis=0)]
flatness_h_mean=np.array(flatness_h_mean)
flatness_h_covariance=np.cov(flatness_h, rowvar=False)
flatness_h_covariance= np.array([flatness_h_covariance])
flatness_h_covariance= np.array([flatness_h_covariance])

flatness_c_mean=[np.mean(flatness_c,axis=0)]
flatness_c_mean=np.array(flatness_c_mean)
flatness_c_covariance=np.cov(flatness_c, rowvar=False)
flatness_c_covariance= np.array([flatness_c_covariance])
flatness_c_covariance= np.array([flatness_c_covariance])

flatness_rec_h_mean=[np.mean(flatness_rec_h,axis=0)]
flatness_rec_h_mean=np.array(flatness_rec_h_mean)
flatness_rec_h_covariance=np.cov(flatness_rec_h, rowvar=False)
flatness_rec_h_covariance= np.array([flatness_rec_h_covariance])
flatness_rec_h_covariance= np.array([flatness_rec_h_covariance])

flatness_rec_c_mean=[np.mean(flatness_rec_c,axis=0)]
flatness_rec_c_mean=np.array(flatness_rec_c_mean)
flatness_rec_c_covariance=np.cov(flatness_rec_c, rowvar=False)
flatness_rec_c_covariance= np.array([flatness_rec_c_covariance])
flatness_rec_c_covariance= np.array([flatness_rec_c_covariance])


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**rolloff**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
rolloff_h = []
rolloff_c = []
rolloff_rec_h = []
rolloff_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = rolloff(row)
    rolloff_h .append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = rolloff(row)
    rolloff_c.append(feature_set_c)
    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = rolloff(row)
    rolloff_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = rolloff(row)
    rolloff_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
rolloff_h_mean=[np.mean(rolloff_h,axis=0)]
rolloff_h_mean=np.array(rolloff_h_mean)
rolloff_h_covariance=np.cov(rolloff_h, rowvar=False)
rolloff_h_covariance= np.array([rolloff_h_covariance])
rolloff_h_covariance= np.array([rolloff_h_covariance])

rolloff_c_mean=[np.mean(rolloff_c,axis=0)]
rolloff_c_mean=np.array(rolloff_c_mean)
rolloff_c_covariance=np.cov(rolloff_c, rowvar=False)
rolloff_c_covariance= np.array([rolloff_c_covariance])
rolloff_c_covariance= np.array([rolloff_c_covariance])

rolloff_rec_h_mean=[np.mean(rolloff_rec_h,axis=0)]
rolloff_rec_h_mean=np.array(rolloff_rec_h_mean)
rolloff_rec_h_covariance=np.cov(rolloff_rec_h, rowvar=False)
rolloff_rec_h_covariance= np.array([rolloff_rec_h_covariance])
rolloff_rec_h_covariance= np.array([rolloff_rec_h_covariance])

rolloff_rec_c_mean=[np.mean(rolloff_rec_c,axis=0)]
rolloff_rec_c_mean=np.array(rolloff_rec_c_mean)
rolloff_rec_c_covariance=np.cov(rolloff_rec_c, rowvar=False)
rolloff_rec_c_covariance= np.array([rolloff_rec_c_covariance])
rolloff_rec_c_covariance= np.array([rolloff_rec_c_covariance])



----------------------------------------------------------------------------------------------------------------------------------------------------------------
**chroma**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
chroma_h = []
chroma_c = []
chroma_rec_h = []
chroma_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = chroma_vec(row)
    chroma_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = chroma_vec(row)
    chroma_c.append(feature_set_c)
    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = chroma_vec(row)
    chroma_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = chroma_vec(row)
    chroma_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]



  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
chroma_h_mean=[np.mean(chroma_h,axis=0)]
chroma_h_mean=np.array(chroma_h_mean)
chroma_h_covariance=np.cov(chroma_h, rowvar=False)

chroma_c_mean=[np.mean(chroma_c,axis=0)]
chroma_c_mean=np.array(chroma_c_mean)
chroma_c_covariance=np.cov(chroma_c, rowvar=False)

chroma_rec_h_mean=[np.mean(chroma_rec_h,axis=0)]
chroma_rec_h_mean=np.array(chroma_rec_h_mean)
chroma_rec_h_covariance=np.cov(chroma_rec_h, rowvar=False)

chroma_rec_c_mean=[np.mean(chroma_rec_c,axis=0)]
chroma_rec_c_mean=np.array(chroma_rec_c_mean)
chroma_rec_c_covariance=np.cov(chroma_rec_c, rowvar=False)


----------------------------------------------------------------------------------------------------------------------------------------------------------------
**zero crossing**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
zero_h = []
zero_c = []
zero_rec_h = []
zero_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = zero(row)
    zero_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = zero(row)
    zero_c.append(feature_set_c)
    
for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = zero(row)
    zero_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = zero(row)
    zero_rec_c.append(feature_set_c)

NameError: ignored

In [None]:
zero_h_mean=[np.mean(zero_h,axis=0)]
zero_h_mean=np.array(zero_h_mean)
zero_h_covariance=np.cov(zero_h, rowvar=False)
zero_h_covariance= np.array([zero_h_covariance])
zero_h_covariance= np.array([zero_h_covariance])


zero_c_mean=[np.mean(zero_c,axis=0)]
zero_c_mean=np.array(zero_c_mean)
zero_c_covariance=np.cov(zero_c, rowvar=False)
zero_c_covariance= np.array([zero_c_covariance])
zero_c_covariance= np.array([zero_c_covariance])


zero_rec_h_mean=[np.mean(zero_rec_h,axis=0)]
zero_rec_h_mean=np.array(zero_rec_h_mean)
zero_rec_h_covariance=np.cov(zero_rec_h, rowvar=False)
zero_rec_h_covariance= np.array([zero_rec_h_covariance])
zero_rec_h_covariance= np.array([zero_rec_h_covariance])


zero_rec_c_mean=[np.mean(zero_rec_c,axis=0)]
zero_rec_c_mean=np.array(zero_rec_c_mean)
zero_rec_c_covariance=np.cov(zero_rec_c, rowvar=False)
zero_rec_c_covariance= np.array([zero_rec_c_covariance])
zero_rec_c_covariance= np.array([zero_rec_c_covariance])



----------------------------------------------------------------------------------------------------------------------------------------------------------------
**Rms energy**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
rms_h = []
rms_c = []
rms_rec_h = []
rms_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = rms(row)
    rms_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = rms(row)
    rms_c.append(feature_set_c)

for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = rms(row)
    rms_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = rms(row)
    rms_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
rms_h_mean=[np.mean(rms_h,axis=0)]
rms_h_mean=np.array(rms_h_mean)
rms_h_covariance=np.cov(rms_h, rowvar=False)
rms_h_covariance= np.array([rms_h_covariance])
rms_h_covariance= np.array([rms_h_covariance])


rms_c_mean=[np.mean(rms_c,axis=0)]
rms_c_mean=np.array(rms_c_mean)
rms_c_covariance=np.cov(rms_c, rowvar=False)
rms_c_covariance= np.array([rms_c_covariance])
rms_c_covariance= np.array([rms_c_covariance])


rms_rec_h_mean=[np.mean(rms_rec_h,axis=0)]
rms_rec_h_mean=np.array(rms_rec_h_mean)
rms_rec_h_covariance=np.cov(rms_rec_h, rowvar=False)
rms_rec_h_covariance= np.array([rms_rec_h_covariance])
rms_rec_h_covariance= np.array([rms_rec_h_covariance])


rms_rec_c_mean=[np.mean(rms_rec_c,axis=0)]
rms_rec_c_mean=np.array(rms_rec_c_mean)
rms_rec_c_covariance=np.cov(rms_rec_c, rowvar=False)
rms_rec_c_covariance= np.array([rms_rec_c_covariance])
rms_rec_c_covariance= np.array([rms_rec_c_covariance])



----------------------------------------------------------------------------------------------------------------------------------------------------------------
**onset**
----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
onset_h = []
onset_c = []
onset_rec_h = []
onset_rec_c = []

for row in tqdm(ds_h.values[0:]):
    feature_set_h  = onset(row)
    onset_h.append(feature_set_h)

for row in tqdm(ds_c.values[0:]):
    feature_set_c  = onset(row)
    onset_c.append(feature_set_c)    

for row in tqdm(ds_rec_h.values[0:]):
    feature_set_h  = onset(row)
    onset_rec_h.append(feature_set_h)

for row in tqdm(ds_rec_c.values[0:]):
    feature_set_c  = onset(row)
    onset_rec_c.append(feature_set_c)

  0%|          | 0/1339 [00:00<?, ?it/s]

  0%|          | 0/1140 [00:00<?, ?it/s]

  0%|          | 0/2232 [00:00<?, ?it/s]

  0%|          | 0/1642 [00:00<?, ?it/s]

In [None]:
onset_h_mean=[np.mean(onset_h,axis=0)]
onset_h_mean=np.array(onset_h_mean)
onset_h_mean= np.array([onset_h_mean])
onset_h_covariance=np.cov(onset_h, rowvar=False)
onset_h_covariance= np.array([onset_h_covariance])
onset_h_covariance= np.array([onset_h_covariance])


onset_c_mean=[np.mean(onset_c,axis=0)]
onset_c_mean=np.array(onset_c_mean)
onset_c_mean= np.array([onset_c_mean])
onset_c_covariance=np.cov(onset_c, rowvar=False)
onset_c_covariance= np.array([onset_c_covariance])
onset_c_covariance= np.array([onset_c_covariance])


onset_rec_h_mean=[np.mean(onset_rec_h,axis=0)]
onset_rec_h_mean=np.array(onset_rec_h_mean)
onset_rec_h_mean= np.array([onset_rec_h_mean])
onset_rec_h_covariance=np.cov(onset_rec_h, rowvar=False)
onset_rec_h_covariance= np.array([onset_rec_h_covariance])
onset_rec_h_covariance= np.array([onset_rec_h_covariance])


onset_rec_c_mean=[np.mean(onset_rec_c,axis=0)]
onset_rec_c_mean=np.array(onset_rec_c_mean)
onset_rec_c_mean= np.array([onset_rec_c_mean])
onset_rec_c_covariance=np.cov(onset_rec_c, rowvar=False)
onset_rec_c_covariance= np.array([onset_rec_c_covariance])
onset_rec_c_covariance= np.array([onset_rec_c_covariance])



In [None]:
print(np.shape(np.array(onset_rec_c_mean)))
print(np.shape(np.array(onset_rec_c_covariance)))
print(np.shape(np.array(onset_rec_h_mean)))
print(np.shape(np.array(onset_rec_h_covariance)))

(1,)
(1, 1)
(1,)
(1, 1)


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# **분리도 계산**
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [19]:
def bhattacharyya_gaussian_distance(distribution1: "dict", distribution2: "dict",) -> int:
    """ Estimate Bhattacharyya Distance (between Gaussian Distributions)
    
    Args:
        distribution1: a sample gaussian distribution 1
        distribution2: a sample gaussian distribution 2
    
    Returns:
        Bhattacharyya distance
    """
    mean1 = distribution1["mean"]
    cov1 = distribution1["covariance"]

    mean2 = distribution2["mean"]
    cov2 = distribution2["covariance"]

    cov = (1 / 2) * (cov1 + cov2)

    T1 = (1 / 8) * (
        np.sqrt((mean1 - mean2) @ np.linalg.inv(cov) @ (mean1 - mean2).T)[0][0]
    )
    T2 = (1 / 2) * np.log(
        np.linalg.det(cov) / np.sqrt(np.linalg.det(cov1) * np.linalg.det(cov2))
    )

    return T1+T2

def seperability_distance(healthy_mean, covid_mean, healthy_covariance, covid_covariance):
    # Distribution 1
    distribution1 = {
        "mean": covid_mean,
        "covariance": covid_covariance,
    }


    # Distribution 2
    distribution2 = {
        "mean": healthy_mean,
        "covariance": healthy_covariance,
    }

    distance=bhattacharyya_gaussian_distance(distribution1,distribution2)

    return distance

--------------------------------------------------------------------------------
**원본데이터**
--------------------------------------------------------------------------------

In [None]:

mfcc_distance=seperability_distance(mfcc_h_mean, mfcc_c_mean, mfcc_h_covariance, mfcc_c_covariance)
mfcc_delta_distance=seperability_distance(mfcc_delta_h_mean, mfcc_delta_c_mean, mfcc_delta_h_covariance, mfcc_delta_c_covariance)
mfcc_delta2_distance=seperability_distance(mfcc_delta2_h_mean, mfcc_delta2_c_mean, mfcc_delta2_h_covariance, mfcc_delta2_c_covariance)
cent_distance=seperability_distance(cent_h_mean, cent_c_mean, cent_h_covariance, cent_c_covariance)
bandwidth_distance=seperability_distance( bandwidth_h_mean, bandwidth_c_mean, bandwidth_h_covariance, bandwidth_c_covariance)
contrast_distance=seperability_distance(contrast_h_mean, contrast_c_mean, contrast_h_covariance, contrast_c_covariance)
flatness_distance=seperability_distance(flatness_h_mean, flatness_c_mean, flatness_h_covariance, flatness_c_covariance)
rolloff_distance=seperability_distance(rolloff_h_mean, rolloff_c_mean, rolloff_h_covariance, rolloff_c_covariance)
chroma_distance=seperability_distance(chroma_h_mean, chroma_c_mean, chroma_h_covariance, chroma_c_covariance)
zero_distance=seperability_distance(zero_h_mean, zero_c_mean, zero_h_covariance, zero_c_covariance)
rms_distance=seperability_distance(rms_h_mean, rms_c_mean, rms_h_covariance, rms_c_covariance)
onset_distance=seperability_distance(onset_h_mean, onset_c_mean, onset_h_covariance,onset_c_covariance)


In [None]:
bahttacharyya_result = {}
bahttacharyya_result['mfcc']=mfcc_distance
bahttacharyya_result['mfcc_delta']=mfcc_delta_distance
bahttacharyya_result['mfcc_delta2']=mfcc_delta2_distance
bahttacharyya_result['cent']=cent_distance
bahttacharyya_result['bandwidth']=bandwidth_distance
bahttacharyya_result['contrast']=contrast_distance
bahttacharyya_result['flatness']=flatness_distance
bahttacharyya_result['rolloff']=rolloff_distance
bahttacharyya_result['chroma']=chroma_distance
bahttacharyya_result['zero']=zero_distance
bahttacharyya_result['rms']=rms_distance
bahttacharyya_result['onset']=onset_distance

In [None]:
bahttacharyya_result

{'mfcc': 0.24105369517574549,
 'mfcc_delta': 0.21735220629336324,
 'mfcc_delta2': 0.23487640474607283,
 'cent': 0.027497290213598414,
 'bandwidth': 0.043192327408589595,
 'contrast': 0.04141744317798871,
 'flatness': 0.08680578860357001,
 'rolloff': 0.03086076317415566,
 'chroma': 0.05913325749139768,
 'zero': 0.01656492921943696,
 'rms': 0.02211691695499817,
 'onset': 0.013689682709757237}

--------------------------------------------------------------------------------
**record데이터**
--------------------------------------------------------------------------------

In [None]:

rec_mfcc_distance=seperability_distance(mfcc_rec_h_mean, mfcc_rec_c_mean, mfcc_rec_h_covariance, mfcc_rec_c_covariance)
rec_mfcc_delta_distance=seperability_distance(mfcc_delta_rec_h_mean, mfcc_delta_rec_c_mean, mfcc_delta_rec_h_covariance, mfcc_delta_rec_c_covariance)
rec_mfcc_delta2_distance=seperability_distance(mfcc_delta2_rec_h_mean, mfcc_delta2_rec_c_mean, mfcc_delta2_rec_h_covariance, mfcc_delta2_rec_c_covariance)
rec_cent_distance=seperability_distance(cent_rec_h_mean, cent_rec_c_mean, cent_rec_h_covariance, cent_rec_c_covariance)
rec_bandwidth_distance=seperability_distance( bandwidth_rec_h_mean, bandwidth_rec_c_mean, bandwidth_rec_h_covariance, bandwidth_rec_c_covariance)
rec_contrast_distance=seperability_distance(contrast_rec_h_mean, contrast_rec_c_mean, contrast_rec_h_covariance, contrast_rec_c_covariance)
rec_flatness_distance=seperability_distance(flatness_rec_h_mean, flatness_rec_c_mean, flatness_rec_h_covariance, flatness_rec_c_covariance)
rec_rolloff_distance=seperability_distance(rolloff_rec_h_mean, rolloff_rec_c_mean, rolloff_rec_h_covariance, rolloff_rec_c_covariance)
rec_chroma_distance=seperability_distance(chroma_rec_h_mean, chroma_rec_c_mean, chroma_rec_h_covariance, chroma_rec_c_covariance)
rec_zero_distance=seperability_distance(zero_rec_h_mean, zero_rec_c_mean, zero_rec_h_covariance, zero_rec_c_covariance)
rec_rms_distance=seperability_distance(rms_rec_h_mean, rms_rec_c_mean, rms_rec_h_covariance, rms_rec_c_covariance)
rec_onset_distance=seperability_distance(onset_rec_h_mean, onset_rec_c_mean, onset_rec_h_covariance,onset_rec_c_covariance)


In [20]:
rec_mfcc_distance=seperability_distance(mfcc_rec_h_mean, mfcc_rec_c_mean, mfcc_rec_h_covariance, mfcc_rec_c_covariance)


In [None]:
rec_bahttacharyya_result = {}
rec_bahttacharyya_result['mfcc']=rec_mfcc_distance
rec_bahttacharyya_result['mfcc_delta']=rec_mfcc_delta_distance
rec_bahttacharyya_result['mfcc_delta2']=rec_mfcc_delta2_distance
rec_bahttacharyya_result['cent']=rec_cent_distance
rec_bahttacharyya_result['bandwidth']=rec_bandwidth_distance
rec_bahttacharyya_result['contrast']=rec_contrast_distance
rec_bahttacharyya_result['flatness']=rec_flatness_distance
rec_bahttacharyya_result['rolloff']=rec_rolloff_distance
rec_bahttacharyya_result['chroma']=rec_chroma_distance
rec_bahttacharyya_result['zero']=rec_zero_distance
rec_bahttacharyya_result['rms']=rec_rms_distance
rec_bahttacharyya_result['onset']=rec_onset_distance

In [22]:
print(rec_mfcc_distance)

0.35276120030685787


In [None]:
rec_bahttacharyya_result

{'mfcc': 0.5525202959574513,
 'mfcc_delta': 0.18498231556933442,
 'mfcc_delta2': 0.2734030219626781,
 'cent': 0.013747707640000684,
 'bandwidth': 0.08373366819254187,
 'contrast': 0.093360296095243,
 'flatness': 0.06439260760418529,
 'rolloff': 0.01895610413183236,
 'chroma': 0.12998467222845916,
 'zero': 0.026312180304328334,
 'rms': 0.006780021716321854,
 'onset': 0.02032794654200262}