In [1]:
import os
import glob
import librosa
import matplotlib.pyplot as plt
import numpy as np
import sys
from tqdm import tqdm
from scipy.spatial.distance import cosine
import pandas as pd

In [2]:
audio_path_unlabeled = glob.glob(os.path.join('C:./dataset/audioonly/unlabeled', '*.wav'))
dataset_maindir = os.path.join(os.getcwd(), 'dataset')
sub_dir_name = next(os.walk(dataset_maindir))[1]
sub_dir_name

['.ipynb_checkpoints', 'audioonly', 'spectrograms']

In [3]:
sub_dir_path = os.path.join(dataset_maindir, sub_dir_name[1])
sub_dir_subpath = next(os.walk(sub_dir_path))[1]
sub_dir_subpath

['labeled', 'unlabeled']

In [4]:
class_folder_path = os.path.join(sub_dir_path, sub_dir_subpath[0])
class_lst = next(os.walk(class_folder_path))[1]
class_path = [os.path.join(class_folder_path, type) for type in class_lst]
class_path

['C:\\Users\\dave\\aiffel\\EUANGGG\\maincode\\data\\dataset\\audioonly\\labeled\\belly_pain',
 'C:\\Users\\dave\\aiffel\\EUANGGG\\maincode\\data\\dataset\\audioonly\\labeled\\burping',
 'C:\\Users\\dave\\aiffel\\EUANGGG\\maincode\\data\\dataset\\audioonly\\labeled\\discomfort',
 'C:\\Users\\dave\\aiffel\\EUANGGG\\maincode\\data\\dataset\\audioonly\\labeled\\hungry',
 'C:\\Users\\dave\\aiffel\\EUANGGG\\maincode\\data\\dataset\\audioonly\\labeled\\tired']

In [5]:
audio_path_labeled = [glob.glob(os.path.join(folder,'*.wav')) for folder in class_path]

In [6]:
bellypain_path = audio_path_labeled[0]
burping_path = audio_path_labeled[1]
discomfort_path = audio_path_labeled[2]
hungry_path = audio_path_labeled[3]
tired_path = audio_path_labeled[4]

In [7]:
# Bellypain audio to MFCCs
spectro_belly = []

for path in bellypain_path:
    y, sr = librosa.load(path, sr = 44100)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectro_belly.append(mfccs)

In [8]:
# burping audio to MFCCs
spectro_burp = []

for path in burping_path:
    y, sr = librosa.load(path, sr = 44100)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectro_burp.append(mfccs)

In [9]:
# discomfort audio to MFCCs
spectro_discomfort = []

for path in discomfort_path:
    y, sr = librosa.load(path, sr = 44100)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectro_discomfort.append(mfccs)

In [10]:
# hungry audio to MFCCs
spectro_hungry = []

for path in hungry_path:
    y, sr = librosa.load(path, sr = 44100)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectro_hungry.append(mfccs)

In [11]:
# tired audio to MFCCs
spectro_tired = []

for path in tired_path:
    y, sr = librosa.load(path, sr = 44100)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectro_tired.append(mfccs)

In [12]:
# tired audio to MFCCs
spectro_unlabeled = []

for path in tqdm(audio_path_unlabeled):
    y, sr = librosa.load(path, sr = 44100)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectro_unlabeled.append(mfccs)

100%|████████████████████████████████████████████████████████████████████████████| 18190/18190 [04:06<00:00, 73.66it/s]


In [13]:
# labeled 와 unlabeled 리사이징 및 백터화
spectro_belly_resized = [np.array(mfcc).flatten()[:160] for mfcc in spectro_belly]
spectro_burp_resized = [np.array(mfcc).flatten()[:160] for mfcc in spectro_burp]
spectro_discomfort_resized = [np.array(mfcc).flatten()[:160] for mfcc in spectro_discomfort]
spectro_hungry_resized = [np.array(mfcc).flatten()[:160] for mfcc in spectro_hungry]
spectro_tired_resized = [np.array(mfcc).flatten()[:160] for mfcc in spectro_tired]
spectro_unlabeled_resized = [np.array(mfcc).flatten()[:160] for mfcc in spectro_unlabeled]

In [14]:
# 평균벡터 추출
def mean_vector(vectors):
    return np.mean(vectors, axis=0)

mean_bellypain = mean_vector(spectro_belly_resized)
mean_burp = mean_vector(spectro_burp_resized)
mean_discomfort = mean_vector(spectro_discomfort_resized)
mean_hungry = mean_vector(spectro_hungry_resized)
mean_tired = mean_vector(spectro_tired_resized)

In [16]:
def euclidean_distance(vec1, vec2):
    return np.linalg.norm(np.array(vec1) - np.array(vec2))

In [18]:
distance_result = []

for idx, unlabeled_vector in enumerate(spectro_unlabeled_resized):
    distance_bellypain = euclidean_distance(unlabeled_vector, mean_bellypain)
    distance_burp = euclidean_distance(unlabeled_vector, mean_burp)
    distance_discomfort = euclidean_distance(unlabeled_vector, mean_discomfort)
    distance_hungry = euclidean_distance(unlabeled_vector, mean_hungry)
    distance_tired = euclidean_distance(unlabeled_vector, mean_tired)
    
    # 결과 값을 딕셔너리로 저장
    distance_result.append({
        'Index': f'unlabeled_audio{idx}',
        'Bellypain': distance_bellypain,
        'Burp': distance_burp,
        'Discomfort': distance_discomfort,
        'Hungry': distance_hungry,
        'Tired': distance_tired
    })

In [19]:
dist_df = pd.DataFrame(distance_result)
dist_df

Unnamed: 0,Index,Bellypain,Burp,Discomfort,Hungry,Tired
0,unlabeled_audio0,4943.313965,4056.430664,4684.721191,4582.785156,4944.341797
1,unlabeled_audio1,4517.753418,3739.772461,4354.932617,4208.364258,4542.059570
2,unlabeled_audio2,4900.624512,4499.251465,4890.862305,4719.550781,4968.603516
3,unlabeled_audio3,1550.205688,1735.352905,1509.697388,1488.472412,1526.140015
4,unlabeled_audio4,5002.562500,4163.729980,4792.176270,4672.087402,5014.145508
...,...,...,...,...,...,...
18185,unlabeled_audio18185,4593.085449,3983.428711,4520.574219,4361.925781,4642.105957
18186,unlabeled_audio18186,5077.975586,4348.554688,4950.985840,4799.630371,5117.282715
18187,unlabeled_audio18187,4670.651855,3991.191406,4568.742676,4419.137695,4708.166504
18188,unlabeled_audio18188,4703.713379,4016.013428,4592.695312,4442.621582,4741.041992


In [20]:
dist_df.describe()

Unnamed: 0,Bellypain,Burp,Discomfort,Hungry,Tired
count,18190.0,18190.0,18190.0,18190.0,18190.0
mean,4768.387207,4038.070068,4556.151367,4470.983398,4758.709473
std,1075.59082,661.28302,940.138489,925.432983,1063.633545
min,262.516205,568.653564,297.728851,271.154938,308.292664
25%,4728.594604,3858.164246,4486.509155,4390.183228,4729.316895
50%,5100.633789,4171.277344,4823.122803,4733.789062,5089.319092
75%,5370.275391,4427.498413,5069.781372,4999.195557,5346.262817
max,6210.287109,5628.781738,6088.222656,5936.377441,6253.121582


In [21]:
cond = (dist_df['Bellypain'] < 1000) & (dist_df['Burp'] < 1000) & (dist_df['Discomfort'] < 1000) & (dist_df['Hungry'] < 1000) & (dist_df['Tired'] < 1000)
filtered_df = dist_df[cond]
filtered_df

Unnamed: 0,Index,Bellypain,Burp,Discomfort,Hungry,Tired
345,unlabeled_audio345,652.893799,926.90802,533.046814,527.121765,651.57312
3417,unlabeled_audio3417,573.102539,803.121826,354.866516,271.154938,503.653259
3421,unlabeled_audio3421,516.566406,945.776917,497.487701,381.996124,546.382751
5680,unlabeled_audio5680,799.442261,918.438904,729.831482,521.970459,821.797058
6225,unlabeled_audio6225,911.008301,918.341431,611.794006,641.56897,823.112183
7370,unlabeled_audio7370,555.038269,925.298767,409.079773,395.413025,554.760681
8580,unlabeled_audio8580,610.177612,879.654785,446.242828,420.297852,576.224731
8586,unlabeled_audio8586,793.117981,923.623779,603.117554,656.065552,737.624146
9688,unlabeled_audio9688,554.476807,959.599731,480.716217,482.265778,531.025452
9963,unlabeled_audio9963,842.661682,951.055115,732.651306,734.493896,809.848816


In [22]:
# Given minimum and maximum values
min_val = 262
max_val = 6253

# Define the inverse scaling function
def inverse_scaling(x, min_val, max_val):
    x = np.clip(x, min_val, max_val)
    # Apply the inverse scaling
    return (max_val - x) / (max_val - min_val)

In [24]:
scaled_df = dist_df[['Bellypain',
                     'Burp',
                     'Discomfort',
                     'Hungry',
                     'Tired']].applymap(lambda x: inverse_scaling(x, min_val, max_val))
scaled_df

  'Tired']].applymap(lambda x: inverse_scaling(x, min_val, max_val))


Unnamed: 0,Bellypain,Burp,Discomfort,Hungry,Tired
0,0.218609,0.366645,0.261772,0.278787,0.218437
1,0.289642,0.419501,0.316820,0.341285,0.285585
2,0.225735,0.292731,0.227364,0.255959,0.214388
3,0.784977,0.754072,0.791738,0.795281,0.788993
4,0.208719,0.348735,0.243836,0.263881,0.206786
...,...,...,...,...,...
18185,0.277068,0.378830,0.289171,0.315653,0.268886
18186,0.196132,0.317884,0.217328,0.242592,0.189571
18187,0.264121,0.377534,0.281131,0.306103,0.257859
18188,0.258602,0.373391,0.277133,0.302183,0.252372


In [34]:
cond_1 = (scaled_df['Bellypain'] < 0.80) & (scaled_df['Hungry'] < 0.80) & (scaled_df['Burp'] < 0.80) & (scaled_df['Discomfort'] < 0.80) & (scaled_df['Tired'] > 0.80)
scaled_df[cond_1]

Unnamed: 0,Bellypain,Burp,Discomfort,Hungry,Tired
10870,0.792489,0.654569,0.773577,0.745996,0.802572


In [37]:
scaled_df.to_csv(r'C:\Users\dave\aiffel\EUANGGG\maincode\data\dist_btw_lab_unlab.csv')