# 필요한 라이브러리만 임포트
import numpy as np  # 다차원 배열을 다루기 위한 라이브러리
import librosa  # 음악 및 오디오 분석을 위한 라이브러리
import os  # 운영 체제와 상호 작용하기 위한 라이브러리, 파일 경로 조작 등
import tensorflow as tf  # 머신러닝 및 신경망을 위한 라이브러리
import pandas as pd  # 데이터 분석 및 조작을 위한 라이브러리
from IPython.display import clear_output  # Jupyter 노트북의 출력을 지우기 위한 함수
import matplotlib.pyplot as plt  # 데이터 시각화를 위한 라이브러리

# Jupyter 노트북에서 matplotlib의 그래프를 인라인으로 표시하도록 설정
%matplotlib inline  

import seaborn as sns  # matplotlib 기반의 고급 시각화 라이브러리
import torch  # 딥러닝 프레임워크 중 하나
from sklearn.model_selection import train_test_split  # 데이터를 훈련 세트와 테스트 세트로 분할하기 위한 함수
from sklearn.metrics import accuracy_score, precision_score, recall_score  # 모델 평가를 위한 성능 지표 계산 함수
from tensorflow.keras import layers, losses  # TensorFlow의 케라스 API를 사용하여 신경망의 층과 손실 함수를 정의
from tensorflow.keras.models import Model  # TensorFlow의 케라스 API를 사용하여 모델을 정의 및 관리
from tensorflow.python.ops.numpy_ops import np_config  # TensorFlow에서 numpy와의 호환성을 위한 설정
np_config.enable_numpy_behavior()  # TensorFlow에서 numpy와 유사한 동작을 활성화

# 사용자 정의 유틸리티 함수 임포트
from util import play_audio, load_audio, get_features, get_mfcc, get_lmfe, get_chroma, plot_chroma, plot_mfcc, plot_lmfe
# 오디오 데이터를 다루기 위한 사용자 정의 함수들 (재생, 로드, 특징 추출 등)

# Seaborn 스타일 설정
sns.set(style="white", palette="muted")  # Seaborn 그래프의 기본 스타일 설정
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})  # Seaborn 그래프의 세부 스타일 설정

import json
from collections import Counter

def summarize_json(data, indent=0, key_prefix=""):
    """
    JSON 데이터를 요약하여 구조를 출력하는 함수
    """
    if isinstance(data, dict):
        print(' ' * indent + '{')
        for key, value in data.items():
            print(' ' * (indent + 2) + f'"{key}": ', end='')
            summarize_json(value, indent + 2, key_prefix=key_prefix + key + "/")
        print(' ' * indent + '}')
    elif isinstance(data, list):
        print(' ' * indent + '[')
        if data:
            item_types = Counter(type(item).__name__ for item in data)
            for item_type, count in item_types.items():
                print(' ' * (indent + 2) + f'{item_type} x {count}')
            for item in data[:1]:  # First item example
                summarize_json(item, indent + 2, key_prefix=key_prefix)
            if len(data) > 1:
                print(' ' * (indent + 2) + '...')
        print(' ' * indent + ']')
    else:
        print(' ' * indent + f'{type(data).__name__}')


위에 안 돌리고 prepare_dataset 을 테스트해보자

In [1]:
import sys
sys.path.append("python/")

from prepare_dataset import get_data_paths_and_labels_from_machine as get_data_from_machine

In [2]:
# from prepare_dataset import get_from_mimii
# "/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_04/normal/"

In [3]:
# base_dir = "/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/"

In [4]:
fan_data_path, fan_data_label = get_data_from_machine("fan")

base_dir is  /mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data
['data', 'DCASE', '2020', 'dev', 'fan', 'train']
['data', 'DCASE', '2020', 'dev', 'fan', 'test']
['data', 'DCASE', '2020', 'eval', 'fan', 'test']
['data', 'DCASE', '2020', 'add', 'fan', 'train']
['data', 'DCASE', '2021', 'dev', 'fan', 'train']
['data', 'DCASE', '2021', 'dev', 'fan', 'test']
['data', 'DCASE', '2021', 'eval', 'fan', 'test']
['data', 'DCASE', '2021', 'add', 'fan', 'train']
['data', 'DCASE', '2022', 'dev', 'fan', 'train']
['data', 'DCASE', '2022', 'dev', 'fan', 'test']
['data', 'DCASE', '2022', 'eval', 'fan', 'test']
['data', 'DCASE', '2022', 'add', 'fan', 'train']
['data', 'DCASE', '2023', 'dev', 'fan', 'train']
['data', 'DCASE', '2023', 'dev', 'fan', 'test']
['data', 'DCASE', '2024', 'dev', 'fan', 'train']
['data', 'DCASE', '2024', 'dev', 'fan', 'test']
['data', 'MIMII', 'data_-6_db', 'fan', 'id_00', 'normal']
['data', 'MIMII', 'data_-6_db', 'fan', 'id_00', 'abnormal']
['data', 'MIMII', 'data_-

In [5]:
specific_fan_data = fan_data_path["MIMII"]["data_-6_db"]

specific data form is...

normal or abnormal

id 1, 2, 3, 4

In [6]:
specific_fan_data.keys()

dict_keys(['normal', 'abnormal', 'unknown'])

In [7]:
specific_fan_data

{'normal': ['/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000305.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000611.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000000.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000001.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000002.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000003.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000004.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000005.wav',
  '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/n

In [8]:
type(specific_fan_data["normal"])

list

In [9]:
tmp_list = specific_fan_data["normal"] + specific_fan_data["abnormal"]
specific_fan_data = tmp_list

In [10]:
type(specific_fan_data)

list

In [11]:
specific_fan_data[100:110]

['/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000086.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000087.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000088.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000090.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000091.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000092.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000093.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000094.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII

In [12]:
len(specific_fan_data)

5550

In [13]:
specific_fan_data_label = fan_data_label["MIMII"]["data_-6_db"]

In [14]:
type(specific_fan_data_label)

dict

In [15]:
len(specific_fan_data_label)

5550

In [16]:
list(specific_fan_data_label.keys())

['/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000305.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000611.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000000.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000001.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000002.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000003.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000004.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000005.wav',
 '/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII

In [17]:
specific_fan_data_label = [specific_fan_data_label[raw_path] for raw_path in specific_fan_data]

In [18]:
offset = 4070
for idx in range(7) :
    print(specific_fan_data[idx + offset])
    print(specific_fan_data_label[idx + offset])
    print("---")

/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_06/normal/00001010.wav
1
---
/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_06/normal/00001011.wav
1
---
/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_06/normal/00001012.wav
1
---
/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_06/normal/00001013.wav
1
---
/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_06/normal/00001014.wav
1
---
/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/abnormal/00000000.wav
-1
---
/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/abnormal/00000001.wav
-1
---


In [19]:
from pd_preprocess_from_dataset import get_specific_data

In [20]:
data = get_specific_data(fan_data_path)

In [21]:
type(data)

dict

In [22]:
data.keys()

dict_keys(['normal', 'abnormal', 'unknown'])

In [23]:
type(data["normal"])

list

In [24]:
len(data["normal"])

4075

In [25]:
len(data["abnormal"])

1475

In [26]:
len(data["normal"]) + len(data["abnormal"])

5550

In [27]:
from pd_preprocess_from_dataset import get_flatten_data

In [28]:
flatten_data = get_flatten_data(data)

In [29]:
type(flatten_data)

list

In [30]:
len(flatten_data)

5550

In [31]:
specific_label_data = get_specific_data(fan_data_label)

In [32]:
type(specific_fan_data_label)

list

In [33]:
len(specific_fan_data_label)

5550

In [34]:
from pd_preprocess_from_dataset import get_label_from_flatten_specifics

In [35]:
specific_label = get_specific_data(fan_data_label)

In [36]:
label_data = get_label_from_flatten_specifics(flatten_data, specific_label)

In [37]:
label_data

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [38]:
def view_wtf_sample(offset = 0) : 
    wtf_count = 0
    wtf_arr = list()
    for idx in range(5050) :
        # print(flatten_data[idx + offset])
        # print(label_data[idx + offset])
        # print("---")
        if ("abnormal" in flatten_data[idx + offset]) != (label_data[idx + offset] == -1) : 
            wtf_count += 1
            wtf_arr.append(idx + offset)

    print(f"wtf count : {wtf_count}")

    return wtf_arr


In [39]:
wtf_arr = view_wtf_sample()

wtf count : 0


In [40]:
wtf_arr

[]

In [41]:
def view_data_sample(list_of_idx) : 
    for idx in list_of_idx:
        print(flatten_data[idx])
        print(label_data[idx])
        print("---")

In [42]:
view_data_sample(wtf_arr)

In [43]:
import pandas as pd
import numpy as np

In [44]:
df = pd.DataFrame({"filename" : flatten_data, "label" : label_data})

In [45]:
df.head()

Unnamed: 0,filename,label
0,/mnt/d/silofox/paad/anomaly-example/exploring-...,1
1,/mnt/d/silofox/paad/anomaly-example/exploring-...,1
2,/mnt/d/silofox/paad/anomaly-example/exploring-...,1
3,/mnt/d/silofox/paad/anomaly-example/exploring-...,1
4,/mnt/d/silofox/paad/anomaly-example/exploring-...,1


In [46]:
data_type = ["fan", "pump", "slider", "valve"]

for each_type in data_type : 
    if each_type in df.iloc[0]["filename"] : 
        df["type"] = each_type

In [47]:
df.head()

Unnamed: 0,filename,label,type
0,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan
1,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan
2,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan
3,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan
4,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan


In [48]:
if df.iloc[0]["type"] == "fan" :
    print("yes")

yes


In [49]:
df.iloc[0]["filename"].split("/")

['',
 'mnt',
 'd',
 'silofox',
 'paad',
 'anomaly-example',
 'exploring-AAD',
 'notebooks',
 'data',
 'MIMII',
 'data_-6_db',
 'fan',
 'id_00',
 'normal',
 '00000305.wav']

In [50]:
id_idx = [idx for idx, each_in_path in enumerate(df.iloc[0]["filename"].split("/")) if "id_" in each_in_path][0]

In [51]:
id_idx

12

In [52]:
df_tmp = df["filename"].apply(lambda path : path.split("/")[id_idx].split("_")[1])

In [53]:
df_tmp.head()

0    00
1    00
2    00
3    00
4    00
Name: filename, dtype: object

In [54]:
df_tmp.tail()

5545    06
5546    06
5547    06
5548    06
5549    06
Name: filename, dtype: object

In [55]:
df_tmp.value_counts()

00    1418
04    1381
06    1376
02    1375
Name: filename, dtype: int64

In [56]:
from pd_preprocess_from_dataset import get_dataframe_from_flatten_data_and_label

In [57]:
df = get_dataframe_from_flatten_data_and_label(flatten_data, label_data)

In [58]:
df.head()

Unnamed: 0,filename,label,type,model
0,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
1,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
2,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
3,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
4,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0


In [59]:
df.tail()

Unnamed: 0,filename,label,type,model
5545,/mnt/d/silofox/paad/anomaly-example/exploring-...,-1,fan,6
5546,/mnt/d/silofox/paad/anomaly-example/exploring-...,-1,fan,6
5547,/mnt/d/silofox/paad/anomaly-example/exploring-...,-1,fan,6
5548,/mnt/d/silofox/paad/anomaly-example/exploring-...,-1,fan,6
5549,/mnt/d/silofox/paad/anomaly-example/exploring-...,-1,fan,6


In [60]:
from utils import get_dir_path_default_dataframe

In [61]:
df.iloc[0]["filename"]

'/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/MIMII/data_-6_db/fan/id_00/normal/00000305.wav'

In [62]:
dir_info = get_dir_path_default_dataframe(df)

In [63]:
dir_info

'fan/data_-6_db'

In [64]:
from utils import get_abs_dir_path
from utils import make_dir_from_abs_path

In [65]:
abs_path = get_abs_dir_path(dir_info)

base_dir is  /mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data


In [66]:
import os 

abs_path = os.path.join(abs_path, "raw_data")
make_dir_from_abs_path(abs_path)

Be careful with the path, This can make all directories in the path.
Path :  /mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/working/pkl/fan/data_-6_db/raw_data
Are you sure you want to make directories in the path? (y/n)


In [67]:
abs_path

'/mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/working/pkl/fan/data_-6_db/raw_data'

In [68]:
from utils import save_dataframe

In [69]:
data_path = save_dataframe(df, abs_path, filename = "metatdata")

Dataframe is saved in  /mnt/d/silofox/paad/anomaly-example/exploring-AAD/notebooks/data/working/pkl/fan/data_-6_db/raw_data/20240626_170325_metatdata.pkl


In [70]:
import pandas as pd

df_tmp = pd.read_pickle(data_path)

In [71]:
df_tmp.head()

Unnamed: 0,filename,label,type,model
0,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
1,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
2,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
3,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0
4,/mnt/d/silofox/paad/anomaly-example/exploring-...,1,fan,0


In [72]:
from utils_for_preprocess import load_audio

In [73]:
sample_file_path = df_tmp.iloc[0]["filename"]
waveform, sample_rate = load_audio(sample_file_path)

In [74]:
type(waveform)

torch.Tensor

In [75]:
type(sample_rate)

int

In [76]:
sample_rate

16000

In [77]:
waveform.shape

torch.Size([1, 160000])