# CNN + 분류 모델 (학습데이터셋: final_training_data_original)

In [1]:
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

In [2]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

# 데이터 준비 및 전처리
- combined_result.csv (raw_data + 기타소음4)

In [3]:
#학습 데이터 준비
df = pd.read_csv("final_training_data_original.csv", encoding='utf-8')
feature_cols = [f'mfcc_{i}' for i in range(1, 51)]
df = df[feature_cols + ['category_03']]

In [4]:
df.shape

(30395, 51)

In [5]:
df.info

<bound method DataFrame.info of            mfcc_1      mfcc_2     mfcc_3     mfcc_4     mfcc_5     mfcc_6  \
0     -391.004300  107.641975  64.204640  33.718952  20.630340  13.337987   
1     -233.662840   87.917620  45.434906  19.435001   9.098841  32.400368   
2     -318.161530  126.123180  43.729805 -11.429483  -5.223235  13.667998   
3     -385.209260  140.265410  57.263832  -7.932145  -3.877982  14.155136   
4     -382.124500  150.004900  61.439040 -17.890905  -7.740589  15.947204   
...           ...         ...        ...        ...        ...        ...   
30390 -344.941956   75.622345 -10.009164 -10.861253   1.282245  25.479780   
30391 -342.402252   94.272285  26.308794  19.576403  14.986108  21.159178   
30392 -433.578156  146.280182  -3.445537  32.104584  -4.695651  21.684990   
30393 -381.544830  146.417786  10.526478  13.691162  16.856730   2.699800   
30394 -269.841431  172.992615 -36.046337  22.788210  -5.915753  10.119747   

          mfcc_7     mfcc_8     mfcc_9    m

In [6]:
df['category_03'].value_counts()

이륜차주행음    4735
이륜차경적     4560
차량경적      3189
항타기       3036
콘크리트펌프    2730
공구        2216
발전기       2164
개         2077
고양이       2016
차량사이렌     1990
차량주행음     1682
Name: category_03, dtype: int64

# 소음 카테고리 변환

In [7]:
# 소음 카테고리 변환 함수
def categorize_noise(category):
    if category in ['이륜차경적']:
        return '이륜차경적'
    elif category in ['이륜차주행음']:
        return '이륜차주행음'
    elif category in ['차량사이렌']:
        return '차량사이렌'
    elif category in ['차량주행음']:
        return '차량주행음'
    elif category in ['차량경적']:
        return '차량경적'
    else:
        return '기타소음'

df['label'] = df['category_03'].apply(categorize_noise)
df = df.drop('category_03', axis=1)

X = df[feature_cols].values  # MFCC 특징 값
y = df['label'].values       # 레이블

# 레이블 인코딩 (문자 → 숫자)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)  # One-hot 인코딩

# 데이터셋 분할 (훈련:테스트 = 80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y)

# CNN 입력 형태로 변환 (3D 텐서: 샘플 수 x 시간 축 x 특징 수)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [8]:
df['label'].value_counts()

기타소음      14239
이륜차주행음     4735
이륜차경적      4560
차량경적       3189
차량사이렌      1990
차량주행음      1682
Name: label, dtype: int64

# 모델 생성

## CNN

In [9]:
# CNN 모델 정의
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')  # 출력층 (카테고리 수만큼 출력)
])

In [10]:
# 모델 컴파일 및 학습
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

# 테스트 정확도 출력
test_loss, test_accuracy = model.evaluate(X_test, y_test)

# 테스트 데이터에 대한 예측 수행
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # 예측된 클래스 인덱스
y_true_classes = np.argmax(y_test, axis=1)  # 실제 클래스 인덱스

# 정확도 계산
accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f"Accuracy: {accuracy:.4f}")

# 분류 보고서 출력
class_names = label_encoder.classes_
report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)
print("Classification Report:\n", report)

# 혼동 행렬 출력
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)
print("Confusion Matrix:\n", conf_matrix)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Accuracy: 0.9710
Classification Report:
               precision    recall  f1-score   support

        기타소음       0.99      1.00      0.99      2848
       이륜차경적       0.99      0.96      0.97       912
      이륜차주행음       0.94      0.95      0.95       947
        차량경적       0.94      0.97      0.95       638
       차량사이렌       0.99      0.95      0.97       398
       차량주행음       0.86      0.90      0.88       336

    accuracy                           0.97      6079
   macro avg       0.95      0.95      0.95      6079
weighted avg       0.97      0.97      0.97      6079

Confusion Matrix:
 [[2835    1    6    2    2    2]
 [   0  872  

In [11]:
# # 모델 저장
# model.save('cnn_model_6classfication.h5')
# print("모델이 cnn_model_6classfication.h5 파일로 저장되었습니다.")

### 성능 개선
- 배치 정규화 (Batch Normalisation) 레이어 Conv1D 와 Dense 추가
- 앙상블 기법 적용
- ReduceROnPlateau 콜백 사용 
- 모델 구조 함수화

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Dropout, Flatten, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 데이터 준비 (기존 코드와 동일)
df = pd.read_csv("combined_result.csv", encoding='utf-8')
feature_cols = [f'mfcc_{i}' for i in range(1, 51)]
df = df[feature_cols + ['category_03']]

def categorize_noise(category):
    if category in ['이륜차경적']:
        return '이륜차경적'
    elif category in ['이륜차주행음']:
        return '이륜차주행음'
    elif category in ['차량사이렌']:
        return '차량사이렌'
    elif category in ['차량주행음']:
        return '차량주행음'
    elif category in ['차량경적']:
        return '차량경적'
    else:
        return '기타소음'

df['label'] = df['category_03'].apply(categorize_noise)
df = df.drop('category_03', axis=1)

X = df[feature_cols].values
y = df['label'].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# 개선된 CNN 모델 정의
def create_model():
    inputs = Input(shape=(X_train.shape[1], 1))
    x = Conv1D(64, kernel_size=3, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(0.3)(x)
    
    x = Conv1D(128, kernel_size=3, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(0.3)(x)
    
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(y_categorical.shape[1], activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# 앙상블 모델 생성
def create_ensemble(num_models=3):
    models = []
    for _ in range(num_models):
        model = create_model()
        models.append(model)
    return models

# 앙상블 모델 학습
ensemble = create_ensemble()

# 학습률 스케줄링
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

# 모델 학습
histories = []
for i, model in enumerate(ensemble):
    print(f"Training model {i+1}/{len(ensemble)}")
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), callbacks=[reduce_lr])
    histories.append(history)

# 앙상블 예측
def ensemble_predict(models, X):
    predictions = [model.predict(X) for model in models]
    return np.mean(predictions, axis=0)

# 테스트 데이터에 대한 앙상블 예측
y_pred_ensemble = ensemble_predict(ensemble, X_test)
y_pred_classes = np.argmax(y_pred_ensemble, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# 정확도 계산
accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f"Ensemble Accuracy: {accuracy:.4f}")

# 분류 보고서 출력
class_names = label_encoder.classes_
report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)
print("Classification Report:\n", report)

# 혼동 행렬 출력
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)
print("Confusion Matrix:\n", conf_matrix)


Training model 1/3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training model 2/3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training model 3/3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/3

In [13]:
# 모델 저장
model.save('cnn2_6classfication_original.h5')
print("모델이 cnn2_6classfication_original.h5 파일로 저장되었습니다.")

모델이 cnn2_6classfication_original.h5 파일로 저장되었습니다.


### 차량 주행음 개선

In [14]:
# from sklearn.utils import class_weight
# from tensorflow.keras.optimizers import Adam

# # 클래스 가중치 계산
# class_weights = class_weight.compute_class_weight('balanced',
#                                                  classes=np.unique(np.argmax(y_train, axis=1)),
#                                                  y=np.argmax(y_train, axis=1))
# class_weight_dict = dict(enumerate(class_weights))

# # 모델 컴파일 및 학습
# model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weight_dict, callbacks=[reduce_lr])


In [15]:
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.models import Sequential, Model
# from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Dropout, Flatten, Input, BatchNormalization
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.callbacks import ReduceLROnPlateau
# from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# from sklearn.utils import class_weight

# # 데이터 준비 (기존 코드와 동일)
# df = pd.read_csv("combined_result.csv", encoding='utf-8')
# feature_cols = [f'mfcc_{i}' for i in range(1, 51)]
# df = df[feature_cols + ['category_03']]

# def categorize_noise(category):
#     if category in ['이륜차경적']:
#         return '이륜차경적'
#     elif category in ['이륜차주행음']:
#         return '이륜차주행음'
#     elif category in ['차량사이렌']:
#         return '차량사이렌'
#     elif category in ['차량주행음']:
#         return '차량주행음'
#     elif category in ['차량경적']:
#         return '차량경적'
#     else:
#         return '기타소음'

# df['label'] = df['category_03'].apply(categorize_noise)
# df = df.drop('category_03', axis=1)

# X = df[feature_cols].values
# y = df['label'].values

# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)
# y_categorical = to_categorical(y_encoded)

# X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y)

# X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
# X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# # 클래스 가중치 계산
# class_weights = class_weight.compute_class_weight('balanced',
#                                                  classes=np.unique(np.argmax(y_train, axis=1)),
#                                                  y=np.argmax(y_train, axis=1))
# class_weight_dict = dict(enumerate(class_weights))

# # 개선된 CNN 모델 정의
# def create_model():
#     inputs = Input(shape=(X_train.shape[1], 1))
#     x = Conv1D(64, kernel_size=3, activation='relu')(inputs)
#     x = BatchNormalization()(x)
#     x = MaxPooling1D(pool_size=2)(x)
#     x = Dropout(0.3)(x)
    
#     x = Conv1D(128, kernel_size=3, activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = MaxPooling1D(pool_size=2)(x)
#     x = Dropout(0.3)(x)
    
#     x = Flatten()(x)
#     x = Dense(128, activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = Dropout(0.3)(x)
#     outputs = Dense(y_categorical.shape[1], activation='softmax')(x)
    
#     model = Model(inputs=inputs, outputs=outputs)
#     return model

# # 앙상블 모델 생성
# def create_ensemble(num_models=3):
#     models = []
#     for _ in range(num_models):
#         model = create_model()
#         models.append(model)
#     return models

# # 앙상블 모델 학습
# ensemble = create_ensemble()

# # 학습률 스케줄링
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

# # 모델 학습
# histories = []
# for i, model in enumerate(ensemble):
#     print(f"Training model {i+1}/{len(ensemble)}")
#     model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#     history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), 
#                         class_weight=class_weight_dict, callbacks=[reduce_lr])
#     histories.append(history)

# # 앙상블 예측
# def ensemble_predict(models, X):
#     predictions = [model.predict(X) for model in models]
#     return np.mean(predictions, axis=0)

# # 테스트 데이터에 대한 앙상블 예측
# y_pred_ensemble = ensemble_predict(ensemble, X_test)
# y_pred_classes = np.argmax(y_pred_ensemble, axis=1)
# y_true_classes = np.argmax(y_test, axis=1)

# # 정확도 계산
# accuracy = accuracy_score(y_true_classes, y_pred_classes)
# print(f"Ensemble Accuracy: {accuracy:.4f}")

# # 분류 보고서 출력
# class_names = label_encoder.classes_
# report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)
# print("Classification Report:\n", report)

# # 혼동 행렬 출력
# conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)
# print("Confusion Matrix:\n", conf_matrix)


In [16]:
# # 모델 저장
# model.save('updated2_cnn_model_6classfication.h5')
# print("모델이 updated2_cnn_model_6classfication.h5 파일로 저장되었습니다.")

### 차량주행음 - Precision 과 f1-score 개선
- 오버샘플링

In [17]:
# from imblearn.over_sampling import RandomOverSampler
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Dropout, Flatten, Input, BatchNormalization
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.callbacks import ReduceLROnPlateau
# from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# # 데이터 준비
# df = pd.read_csv("combined_result.csv", encoding='utf-8')
# feature_cols = [f'mfcc_{i}' for i in range(1, 51)]
# df = df[feature_cols + ['category_03']]

# def categorize_noise(category):
#     if category in ['이륜차경적']:
#         return '이륜차경적'
#     elif category in ['이륜차주행음']:
#         return '이륜차주행음'
#     elif category in ['차량사이렌']:
#         return '차량사이렌'
#     elif category in ['차량주행음']:
#         return '차량주행음'
#     elif category in ['차량경적']:
#         return '차량경적'
#     else:
#         return '기타소음'

# df['label'] = df['category_03'].apply(categorize_noise)
# df = df.drop('category_03', axis=1)

# X = df[feature_cols].values
# y = df['label'].values

# # 레이블 인코딩 및 데이터 분할
# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)

# X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y)

# # 오버샘플링 적용 (훈련 데이터만)
# ros = RandomOverSampler(random_state=42)
# X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)

# # 원-핫 인코딩
# y_train_categorical = to_categorical(y_train_resampled)
# y_test_categorical = to_categorical(y_test)

# # 데이터 형태 변환
# X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], X_train_resampled.shape[1], 1)
# X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# # CNN 모델 정의
# def create_model():
#     inputs = Input(shape=(X_train_resampled.shape[1], 1))
#     x = Conv1D(64, kernel_size=3, activation='relu')(inputs)
#     x = BatchNormalization()(x)
#     x = MaxPooling1D(pool_size=2)(x)
#     x = Dropout(0.3)(x)
    
#     x = Conv1D(128, kernel_size=3, activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = MaxPooling1D(pool_size=2)(x)
#     x = Dropout(0.3)(x)
    
#     x = Flatten()(x)
#     x = Dense(128, activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = Dropout(0.3)(x)
#     outputs = Dense(y_test_categorical.shape[1], activation='softmax')(x)
    
#     model = Model(inputs=inputs, outputs=outputs)
#     return model

# # 앙상블 모델 생성
# def create_ensemble(num_models=3):
#     models = []
#     for _ in range(num_models):
#         model = create_model()
#         models.append(model)
#     return models

# # 앙상블 모델 학습
# ensemble = create_ensemble()

# # 학습률 스케줄링 콜백 정의
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

# # 모델 학습 (오버샘플링된 데이터 사용)
# histories = []
# for i, model in enumerate(ensemble):
#     print(f"Training model {i+1}/{len(ensemble)}")
#     model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#     history = model.fit(X_train_resampled, y_train_categorical,
#                         epochs=30,
#                         batch_size=32,
#                         validation_data=(X_test, y_test_categorical),
#                         callbacks=[reduce_lr])
#     histories.append(history)

# # 앙상블 예측 함수 정의
# def ensemble_predict(models, X):
#     predictions = [model.predict(X) for model in models]
#     return np.mean(predictions, axis=0)

# # 테스트 데이터에 대한 앙상블 예측 수행
# y_pred_ensemble = ensemble_predict(ensemble, X_test)

# # 클래스별로 가장 높은 확률을 가진


In [18]:
# # 클래스별로 가장 높은 확률을 가진 클래스 인덱스를 선택
# y_pred_classes = np.argmax(y_pred_ensemble, axis=1)
# y_true_classes = np.argmax(y_test_categorical, axis=1)

# # 정확도 계산 및 출력
# accuracy = accuracy_score(y_true_classes, y_pred_classes)
# print(f"Ensemble Accuracy: {accuracy:.4f}")

# # 분류 보고서 출력
# class_names = label_encoder.classes_
# report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)
# print("Classification Report:\n", report)

# # 혼동 행렬 출력
# conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)
# print("Confusion Matrix:\n", conf_matrix)

# 새로운 wav 파일 분류 함수

In [19]:
print(model.summary())

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 50, 1)]           0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 48, 64)            256       
_________________________________________________________________
batch_normalization_6 (Batch (None, 48, 64)            256       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 24, 64)            0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 24, 64)            0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 22, 128)           24704     
_________________________________________________________________
batch_normalization_7 (Batch (None, 22, 128)           512 

In [20]:
def classify_audio(audio_path):
    try:
        # 음원 파일 로드
        y, sr = librosa.load(audio_path, sr=44100)

        # MFCC 특징 추출
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=50)

        # 추출된 MFCC 특징의 평균값을 사용 (각 MFCC 차수별로 하나의 값)
        mfccs_processed = np.mean(mfccs, axis=1)

        # CNN 입력 형태로 변환 (3D 텐서)
        new_data = mfccs_processed.reshape(1, -1, 1)

        # 예측 수행
        predicted_label_encoded = np.argmax(model.predict(new_data), axis=-1)[0]
        predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0]

        return predicted_label

    except Exception as e:
        print(f"Error processing audio file: {e}")
        return "Unknown"


## 기타소음

### 고양이

In [21]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/동물/15.고양이"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_cat = pd.DataFrame(results)

100%|██████████| 274/274 [00:53<00:00,  5.12it/s]


In [22]:
results_df_cat['Predicted Label'].value_counts()

기타소음      272
이륜차주행음      1
차량사이렌       1
Name: Predicted Label, dtype: int64

In [23]:
# '기타소음'의 개수와 비율 계산
total_files_cat = len(results_df_cat)  # 전체 파일 개수
etc_noise_count_cat = results_df_cat[results_df_cat['Predicted Label'] == '기타소음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_cat = (etc_noise_count_cat / total_files_cat) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_cat}")
print(f"'기타소음' Count: {etc_noise_count_cat}")
print(f"'기타소음' Ratio: {etc_noise_ratio_cat:.2f}%")

Total Files: 274
'기타소음' Count: 272
'기타소음' Ratio: 99.27%


In [24]:
# #CSV로 저장
# results_df_cat.to_csv("cat.csv", index=False)
# print("Classification completed. Results saved to 'cat.csv'")

### 강아지

In [25]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/동물/14.개"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_dog = pd.DataFrame(results)

100%|██████████| 217/217 [01:00<00:00,  3.59it/s]


In [26]:
results_df_dog['Predicted Label'].value_counts()

기타소음     214
차량사이렌      3
Name: Predicted Label, dtype: int64

In [27]:
# '기타소음'의 개수와 비율 계산
total_files_dog = len(results_df_dog)  # 전체 파일 개수
etc_noise_count_dog = results_df_dog[results_df_dog['Predicted Label'] == '기타소음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_dog = (etc_noise_count_dog / total_files_dog) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_dog}")
print(f"'기타소음' Count: {etc_noise_count_dog}")
print(f"'기타소음' Ratio: {etc_noise_ratio_dog:.2f}%")

Total Files: 217
'기타소음' Count: 214
'기타소음' Ratio: 98.62%


In [28]:
# # CSV로 저장
# results_df_dog.to_csv("dog.csv", index=False)
# print("Classification completed. Results saved to 'dog.csv'")

### 항타기

In [29]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/공사장/18.항타기"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_piledriver = pd.DataFrame(results)

100%|██████████| 133/133 [00:52<00:00,  2.52it/s]


In [30]:
results_df_piledriver['Predicted Label'].value_counts()

기타소음      126
이륜차주행음      6
차량경적        1
Name: Predicted Label, dtype: int64

In [31]:
# '기타소음'의 개수와 비율 계산
total_files_piledriver = len(results_df_piledriver)  # 전체 파일 개수
etc_noise_count_piledriver = results_df_piledriver[results_df_piledriver['Predicted Label'] == '기타소음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_piledriver = (etc_noise_count_piledriver / total_files_piledriver) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_piledriver}")
print(f"'기타소음' Count: {etc_noise_count_piledriver}")
print(f"'기타소음' Ratio: {etc_noise_ratio_piledriver:.2f}%")

Total Files: 133
'기타소음' Count: 126
'기타소음' Ratio: 94.74%


In [32]:
# # 결과를 CSV로 저장
# results_df_piledriver.to_csv("piledriver.csv", index=False)
# print("Classification completed. Results saved to 'piledriver.csv'")

### 콘크리트펌프

In [33]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/공사장/20.콘크리트펌프"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_concrete = pd.DataFrame(results)

100%|██████████| 93/93 [01:54<00:00,  1.23s/it]


In [34]:
results_df_concrete['Predicted Label'].value_counts()

기타소음      80
이륜차주행음     9
차량경적       2
차량사이렌      2
Name: Predicted Label, dtype: int64

In [35]:
results_df_concrete['Predicted Label'].value_counts()

# '기타소음'의 개수와 비율 계산
total_files_concrete = len(results_df_concrete)  # 전체 파일 개수
etc_noise_count_concrete = results_df_concrete[results_df_concrete['Predicted Label'] == '기타소음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_concrete = (etc_noise_count_concrete / total_files_concrete) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_concrete}")
print(f"'기타소음' Count: {etc_noise_count_concrete}")
print(f"'기타소음' Ratio: {etc_noise_ratio_concrete:.2f}%")

Total Files: 93
'기타소음' Count: 80
'기타소음' Ratio: 86.02%


In [36]:
# # CSV로 저장
# results_df_vacuum.to_csv("concrete.csv", index=False)
# print("Classification completed. Results saved to 'concrete.csv'")

### 발전기

In [37]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/공사장/20.콘크리트펌프"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_generator = pd.DataFrame(results)

100%|██████████| 93/93 [01:51<00:00,  1.20s/it]


In [38]:
results_df_generator['Predicted Label'].value_counts()

기타소음      80
이륜차주행음     9
차량경적       2
차량사이렌      2
Name: Predicted Label, dtype: int64

In [39]:
# '기타소음'의 개수와 비율 계산
total_files_generator = len(results_df_generator)  # 전체 파일 개수
etc_noise_count_generator = results_df_generator[results_df_generator['Predicted Label'] == '기타소음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_generator = (etc_noise_count_generator / total_files_generator) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_generator}")
print(f"'기타소음' Count: {etc_noise_count_generator}")
print(f"'기타소음' Ratio: {etc_noise_ratio_generator:.2f}%")

Total Files: 93
'기타소음' Count: 80
'기타소음' Ratio: 86.02%


In [40]:
# # CSV로 저장
# results_df_vacuum.to_csv("generator.csv", index=False)
# print("Classification completed. Results saved to 'generator.csv'")

### 공구

In [41]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/공구"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_tools = pd.DataFrame(results)

100%|██████████| 172/172 [00:51<00:00,  3.31it/s]


In [42]:
results_df_tools['Predicted Label'].value_counts()

기타소음      125
이륜차주행음     30
차량사이렌      11
차량주행음       4
이륜차경적       1
차량경적        1
Name: Predicted Label, dtype: int64

In [43]:
# '기타소음'의 개수와 비율 계산
total_files_tools = len(results_df_tools)  # 전체 파일 개수
etc_noise_count_tools = results_df_tools[results_df_tools['Predicted Label'] == '기타소음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_tools = (etc_noise_count_tools / total_files_tools) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_tools}")
print(f"'기타소음' Count: {etc_noise_count_tools}")
print(f"'기타소음' Ratio: {etc_noise_ratio_tools:.2f}%")

Total Files: 172
'기타소음' Count: 125
'기타소음' Ratio: 72.67%


In [44]:
# # CSV로 저장
# results_df_vacuum.to_csv("tools.csv", index=False)
# print("Classification completed. Results saved to 'tools.csv'")

## 교통소음

### 차량 사이렌

In [45]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/교통소음/2.차량사이렌"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_car_siren = pd.DataFrame(results)

100%|██████████| 249/249 [00:44<00:00,  5.61it/s]


In [46]:
results_df_car_siren['Predicted Label'].value_counts()

차량사이렌     241
차량주행음       5
이륜차주행음      2
차량경적        1
Name: Predicted Label, dtype: int64

In [47]:
# '기타소음'의 개수와 비율 계산
total_files_car_siren = len(results_df_car_siren)  # 전체 파일 개수
etc_noise_count_car_siren = results_df_car_siren[results_df_car_siren['Predicted Label'] == '차량사이렌'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_car_siren = (etc_noise_count_car_siren / total_files_car_siren) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_car_siren}")
print(f"'차량사이렌' Count: {etc_noise_count_car_siren}")
print(f"'차량사이렌' Ratio: {etc_noise_ratio_car_siren:.2f}%")

Total Files: 249
'차량사이렌' Count: 241
'차량사이렌' Ratio: 96.79%


In [48]:
# #CSV로 저장
# results_df_car_siren.to_csv("car_siren.csv", index=False)
# print("Classification completed. Results saved to 'car_siren.csv'")

### 차량 경적

In [49]:
folder_path = "/home/ubuntu/data/raw_data/1.Car/1.horn_of_car"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 5. 결과를 DataFrame으로 변환
results_df_car_horn = pd.DataFrame(results)

100%|██████████| 3189/3189 [08:33<00:00,  6.21it/s]


In [50]:
results_df_car_horn['Predicted Label'].value_counts()

차량경적      3113
이륜차경적       37
이륜차주행음      30
차량주행음        7
기타소음         1
차량사이렌        1
Name: Predicted Label, dtype: int64

In [51]:
# '기타소음'의 개수와 비율 계산
total_files_car_horn = len(results_df_car_horn)  # 전체 파일 개수
etc_noise_count_car_horn = results_df_car_horn[results_df_car_horn['Predicted Label'] == '차량경적'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_car_horn = (etc_noise_count_car_horn / total_files_car_horn) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_car_horn}")
print(f"'차량경적' Count: {etc_noise_count_car_horn}")
print(f"'차량경적' Ratio: {etc_noise_ratio_car_horn:.2f}%")

Total Files: 3189
'차량경적' Count: 3113
'차량경적' Ratio: 97.62%


In [52]:
# # CSV로 저장
# results_df_car_horn.to_csv("car_horn.csv", index=False)
# print("Classification completed. Results saved to 'car_horn.csv'")

### 차량 주행음

In [53]:
folder_path = "/home/ubuntu/data/etc_noise_data_test_final/교통소음/3.차량주행음"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_car_driving = pd.DataFrame(results)

100%|██████████| 227/227 [00:50<00:00,  4.51it/s]


In [54]:
results_df_car_driving['Predicted Label'].value_counts()

차량주행음     199
이륜차주행음     21
기타소음        6
차량경적        1
Name: Predicted Label, dtype: int64

In [55]:
# '기타소음'의 개수와 비율 계산
total_files_car_driving = len(results_df_car_driving)  # 전체 파일 개수
etc_noise_count_car_driving = results_df_car_driving[results_df_car_driving['Predicted Label'] == '차량주행음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_car_driving = (etc_noise_count_car_driving / total_files_car_driving) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_car_driving}")
print(f"'차량주행음' Count: {etc_noise_count_car_driving}")
print(f"'차량주행음' Ratio: {etc_noise_ratio_car_driving:.2f}%")

Total Files: 227
'차량주행음' Count: 199
'차량주행음' Ratio: 87.67%


In [56]:
# # CSV로 저장
# results_df_car_driving.to_csv("car_driving.csv", index=False)
# print("Classification completed. Results saved to 'car_driving.csv'")

### 이륜차 경적

In [57]:
folder_path = "/home/ubuntu/data/test_data/raw_data_test/2.Motorcycle/4.horn_of_motorcycle"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_motorcycle_horn = pd.DataFrame(results)

100%|██████████| 519/519 [01:19<00:00,  6.56it/s]


In [58]:
results_df_motorcycle_horn['Predicted Label'].value_counts()

이륜차경적     498
차량경적       18
이륜차주행음      3
Name: Predicted Label, dtype: int64

In [59]:
# '기타소음'의 개수와 비율 계산
total_files_motorcycle_horn = len(results_df_motorcycle_horn)  # 전체 파일 개수
etc_noise_count_motorcycle_horn = results_df_motorcycle_horn[results_df_motorcycle_horn['Predicted Label'] == '이륜차경적'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_motorcycle_horn = (etc_noise_count_motorcycle_horn / total_files_motorcycle_horn) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_motorcycle_horn}")
print(f"'이륜차경적' Count: {etc_noise_count_motorcycle_horn}")
print(f"'이륜차경적' Ratio: {etc_noise_ratio_motorcycle_horn:.2f}%")

Total Files: 519
'이륜차경적' Count: 498
'이륜차경적' Ratio: 95.95%


In [60]:
# # CSV로 저장
# results_df_motorcycle_horn.to_csv("motorcycle_horn.csv", index=False)
# print("Classification completed. Results saved to 'motorcycle_horn.csv'")

### 이륜차 주행음

In [61]:
folder_path = "/home/ubuntu/data/test_data/raw_data_test/2.Motorcycle/5.driving_sound_of_motorcycle"
results = []

for filename in tqdm(os.listdir(folder_path)):
    if filename.endswith(".wav"):
        file_path = os.path.join(folder_path, filename)
        predicted_label = classify_audio(file_path)
        results.append({"File": filename, "Predicted Label": predicted_label})

# 결과를 DataFrame으로 변환
results_df_motorcycle_driving = pd.DataFrame(results)

100%|██████████| 498/498 [01:13<00:00,  6.78it/s]


In [62]:
results_df_motorcycle_driving['Predicted Label'].value_counts()

이륜차주행음    465
차량주행음      22
기타소음        8
차량사이렌       2
차량경적        1
Name: Predicted Label, dtype: int64

In [63]:
# '기타소음'의 개수와 비율 계산
total_files_motorcycle_driving = len(results_df_motorcycle_driving)  # 전체 파일 개수
etc_noise_count_motorcycle_driving = results_df_motorcycle_driving[results_df_motorcycle_driving['Predicted Label'] == '이륜차주행음'].shape[0]  # '기타소음'으로 분류된 파일 개수
etc_noise_ratio_motorcycle_driving = (etc_noise_count_motorcycle_driving / total_files_motorcycle_driving) * 100  # 비율 계산

# 결과 출력
print(f"Total Files: {total_files_motorcycle_driving}")
print(f"'이륜차주행음' Count: {etc_noise_count_motorcycle_driving}")
print(f"'이륜차주행음' Ratio: {etc_noise_ratio_motorcycle_driving:.2f}%")

Total Files: 498
'이륜차주행음' Count: 465
'이륜차주행음' Ratio: 93.37%


In [64]:
# # CSV로 저장
# results_df_motorcycle_driving.to_csv("motorcycle_driving.csv", index=False)
# print("Classification completed. Results saved to 'motorcycle_driving.csv'")

# 결과 비교

In [65]:
from tensorflow import keras
import numpy as np

# 저장된 모델 로드
loaded_model = keras.models.load_model('cnn2_6classfication_original.h5')

# 모델 구조 확인
loaded_model.summary()

# 테스트 데이터로 예측 수행
y_pred = loaded_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# 정확도 계산
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f"Loaded Model Accuracy: {accuracy:.4f}")

# 분류 보고서 출력
from sklearn.metrics import classification_report
class_names = label_encoder.classes_
report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)
print("Classification Report:\n", report)


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 50, 1)]           0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 48, 64)            256       
_________________________________________________________________
batch_normalization_6 (Batch (None, 48, 64)            256       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 24, 64)            0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 24, 64)            0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 22, 128)           24704     
_________________________________________________________________
batch_normalization_7 (Batch (None, 22, 128)           512 

## 기타 소음

In [66]:
# 결과 출력
print(f"고양이 Total Files: {total_files_cat}")
print(f"'기타소음' Count: {etc_noise_count_cat}")
print(f"'기타소음' Ratio: {etc_noise_ratio_cat:.2f}%")
print('*'*30)

# 결과 출력
print(f"강아지 Total Files: {total_files_dog}")
print(f"'기타소음' Count: {etc_noise_count_dog}")
print(f"'기타소음' Ratio: {etc_noise_ratio_dog:.2f}%")
print('*'*30)

# 결과 출력
print(f"향타기 Total Files: {total_files_piledriver}")
print(f"'기타소음' Count: {etc_noise_count_piledriver}")
print(f"'기타소음' Ratio: {etc_noise_ratio_piledriver:.2f}%")
print('*'*30)

# 결과 출력
print(f"콘크리트펌프 Total Files: {total_files_concrete}")
print(f"'기타소음' Count: {etc_noise_count_concrete}")
print(f"'기타소음' Ratio: {etc_noise_ratio_concrete:.2f}%")
print('*'*30)

# 결과 출력
print(f"발전기 Total Files: {total_files_generator}")
print(f"'기타소음' Count: {etc_noise_count_generator}")
print(f"'기타소음' Ratio: {etc_noise_ratio_generator:.2f}%")
print('*'*30)

# 결과 출력
print(f"공구 Total Files: {total_files_tools}")
print(f"'기타소음' Count: {etc_noise_count_tools}")
print(f"'기타소음' Ratio: {etc_noise_ratio_tools:.2f}%")

고양이 Total Files: 274
'기타소음' Count: 272
'기타소음' Ratio: 99.27%
******************************
강아지 Total Files: 217
'기타소음' Count: 214
'기타소음' Ratio: 98.62%
******************************
향타기 Total Files: 133
'기타소음' Count: 126
'기타소음' Ratio: 94.74%
******************************
콘크리트펌프 Total Files: 93
'기타소음' Count: 80
'기타소음' Ratio: 86.02%
******************************
발전기 Total Files: 93
'기타소음' Count: 80
'기타소음' Ratio: 86.02%
******************************
공구 Total Files: 172
'기타소음' Count: 125
'기타소음' Ratio: 72.67%


## 교통 소음

In [67]:
# 결과 출력
print(f"차량사이렌 Total Files: {total_files_car_siren}")
print(f"'차량사이렌' Count: {etc_noise_count_car_siren}")
print(f"'차량사이렌' Ratio: {etc_noise_ratio_car_siren:.2f}%")
print('*'*30)

# 결과 출력
print(f"차량경적 Total Files: {total_files_car_horn}")
print(f"'차량경적' Count: {etc_noise_count_car_horn}")
print(f"'차량경적' Ratio: {etc_noise_ratio_car_horn:.2f}%")
print('*'*30)

# 결과 출력
print(f"차량주행음 Total Files: {total_files_car_driving}")
print(f"'차량주행음' Count: {etc_noise_count_car_driving}")
print(f"'차량주행음' Ratio: {etc_noise_ratio_car_driving:.2f}%")
print('*'*30)

# 결과 출력
print(f"이륜차경적 Total Files: {total_files_motorcycle_horn}")
print(f"'이륜차경적' Count: {etc_noise_count_motorcycle_horn}")
print(f"'이륜차경적' Ratio: {etc_noise_ratio_motorcycle_horn:.2f}%")
print('*'*30)

# 결과 출력
print(f"이륜차주행음 Total Files: {total_files_motorcycle_driving}")
print(f"'이륜차주행음' Count: {etc_noise_count_motorcycle_driving}")
print(f"'이륜차주행음' Ratio: {etc_noise_ratio_motorcycle_driving:.2f}%")

차량사이렌 Total Files: 249
'차량사이렌' Count: 241
'차량사이렌' Ratio: 96.79%
******************************
차량경적 Total Files: 3189
'차량경적' Count: 3113
'차량경적' Ratio: 97.62%
******************************
차량주행음 Total Files: 227
'차량주행음' Count: 199
'차량주행음' Ratio: 87.67%
******************************
이륜차경적 Total Files: 519
'이륜차경적' Count: 498
'이륜차경적' Ratio: 95.95%
******************************
이륜차주행음 Total Files: 498
'이륜차주행음' Count: 465
'이륜차주행음' Ratio: 93.37%
