In [7]:
import os
import librosa
import numpy as np
import pandas as pd



audio_folder = "./Training/TS"




# 장르 리스트 (TS 디렉토리 내의 폴더들)
genres = [d for d in os.listdir(audio_folder) if os.path.isdir(os.path.join(audio_folder, d))]

# 장르 리스트 출력 (확인용)
print("장르 리스트:", genres)

# 오디오 파일 리스트 초기화
audio_files = []

# os.walk()를 사용하여 디렉토리 트리를 순회하면서 모든 오디오 파일을 찾기
for root, dirs, files in os.walk(audio_folder):
    for file in files:
        if file.lower().endswith('.wav'):  # .wav 파일만 필터링
            # 파일의 전체 경로를 생성하여 audio_files 리스트에 추가
            audio_files.append(os.path.join(root, file))

# 오디오 분석 결과 저장 리스트
data = []

# 오디오 파일에서 특징 추출하는 함수
def extract_audio_features(audio_files):
    for audio_file in audio_files:
        # 오디오 파일 로드
        y, sr = librosa.load(audio_file, sr=None)  # 원본 샘플링 레이트 유지

        # 장르 추출 (폴더명에서 가져오기)
        genre = "Unknown"
        for g in genres:
            if f"\\{g}\\" in audio_file or audio_file.startswith(os.path.join(audio_folder, g)):
                genre = g
                break

        # 오디오 특징 추출
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        zero_crossings = librosa.feature.zero_crossing_rate(y=y)

        # MFCC의 13개의 계수별 평균값과 분산값 계산 (각 계수를 따로 열로 뽑기)
        mfcc_mean = [round(np.mean(mfccs[i, :]), 4) for i in range(mfccs.shape[0])]
        mfcc_var = [round(np.var(mfccs[i, :]), 4) for i in range(mfccs.shape[0])]

        # Chroma 평균값과 분산값 계산
        chroma_mean = [round(np.mean(chroma[i, :]), 4) for i in range(chroma.shape[0])]
        chroma_var = [round(np.var(chroma[i, :]), 4) for i in range(chroma.shape[0])]

        # 특징값 계산 (소수점 4자리로 반올림)
        features = {
            **{f"mfcc_{i+1}_mean": mfcc_mean[i] for i in range(13)},  # 13개의 MFCC 평균값을 개별 열로 추가
            **{f"mfcc_{i+1}_var": mfcc_var[i] for i in range(13)},    # 13개의 MFCC 분산값을 개별 열로 추가
            **{f"chroma_{i+1}_mean": chroma_mean[i] for i in range(12)},  # 12개의 Chroma 평균값을 개별 열로 추가
            **{f"chroma_{i+1}_var": chroma_var[i] for i in range(12)},    # 12개의 Chroma 분산값을 개별 열로 추가
            "spectral_centroid_mean": round(np.mean(spectral_centroid), 4),
            "spectral_centroid_var": round(np.var(spectral_centroid), 4),
            "zero_crossing_rate_mean": round(np.mean(zero_crossings), 4),
            "zero_crossing_rate_var": round(np.var(zero_crossings), 4),
        }

        # 파일 이름 및 장르 추가
        file_features = {
            "file_name": os.path.basename(audio_file),
            "genre": genre,  # 장르 추가
            **features,  # 추출한 특징들 추가
        }

        data.append(file_features)

# 오디오 파일 분석 실행
extract_audio_features(audio_files)

# pandas DataFrame으로 변환
df = pd.DataFrame(data)

# DataFrame 확인
print(df.head())



장르 리스트: ['Ballade', 'RnB', 'Trot', 'Rock', 'Dance', 'Hiphop']
                                 file_name    genre  mfcc_1_mean  mfcc_2_mean  \
0  Similar_Ballade_00452_Cover_Genre_A.wav  Ballade   -56.141102    90.430298   
1  Similar_Ballade_00464_Cover_Genre_A.wav  Ballade  -192.632904   114.043800   
2  Similar_Ballade_00232_Cover_Genre_A.wav  Ballade  -124.557297    95.328400   
3  Similar_Ballade_00463_Cover_Genre_A.wav  Ballade  -208.412506   110.276901   
4  Similar_Ballade_00098_Cover_Genre_A.wav  Ballade  -190.539093   114.569801   

   mfcc_3_mean  mfcc_4_mean  mfcc_5_mean  mfcc_6_mean  mfcc_7_mean  \
0     9.702100    15.721100      -0.4799       0.8627     -10.2275   
1    17.224600     5.660000       5.9338       7.4559      -6.1413   
2    -9.202200    31.275801      -8.3722       9.1876      -7.7682   
3   -23.985001    34.171902      -5.5253      -1.3022     -12.9066   
4    -7.740300    16.924601       1.5354       7.3480      -3.9347   

   mfcc_8_mean  ...  chroma_7_

In [19]:
import pandas as pd
import sklearn
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder



# 특징과 라벨 분리
x = df.drop(columns=['file_name', 'genre'])
y = df['genre']

# 정규화
scaler = sklearn.preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(x)

# 라벨 인코딩
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 학습/테스트 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_encoded, test_size=0.2, random_state=42)


In [20]:
# Naive Bayes 모델 학습
nb = GaussianNB()
nb.fit(x_train, y_train)

# 예측 및 평가
y_pred = nb.predict(x_test)
print('Naive Bayes 정확도: %.2f' % accuracy_score(y_test, y_pred))

Naive Bayes 정확도: 0.40


In [21]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# 의사결정트리 모델 학습
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train, y_train)

# 예측 및 평가
y_pred = dt.predict(x_test)
print('Decision Tree 정확도: %.2f' % accuracy_score(y_test, y_pred))

Decision Tree 정확도: 0.30


In [22]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# 확률적 경사 하강법모델 학습
sgd = SGDClassifier(max_iter=1000, random_state=42)
sgd.fit(x_train, y_train)

# 예측 및 평가
y_pred = sgd.predict(x_test)
print('SGD 정확도: %.2f' % accuracy_score(y_test, y_pred))

SGD 정확도: 0.37


In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 랜덤 포레스트 모델 학습
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# 예측 및 평가
y_pred = rf.predict(x_test)
print('Random Forest 정확도: %.2f' % accuracy_score(y_test, y_pred))

Random Forest 정확도: 0.40


In [24]:
from sklearn.neighbors import KNeighborsClassifier
# KNN 모델 학습 (k=5 사용)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

# 예측 및 정확도 평가
y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"KNN 정확도: {accuracy:.4f}")

KNN 정확도: 0.3315


In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 로지스틱 회귀 모델 학습
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(x_train, y_train)

# 예측 및 평가
y_pred = lr.predict(x_test)
print('Logistic Regression 정확도: %.2f' % accuracy_score(y_test, y_pred))

Logistic Regression 정확도: 0.43


In [26]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 신경망 모델 학습
nn = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
nn.fit(x_train, y_train)

# 예측 및 평가
y_pred = nn.predict(x_test)
print('Neural Network 정확도: %.2f' % accuracy_score(y_test, y_pred))

Neural Network 정확도: 0.31




In [27]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# svm 모델 학습
svm = SVC(kernel='linear', random_state=42)
svm.fit(x_train, y_train)

# 예측 및 평가
y_pred = svm.predict(x_test)
print('SVM 정확도: %.2f' % accuracy_score(y_test, y_pred))

SVM 정확도: 0.39


In [17]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# xgboost 모델 학습
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
xgb.fit(x_train, y_train)

# 예측 및 평가
y_pred = xgb.predict(x_test)
print('XGBoost 정확도: %.2f' % accuracy_score(y_test, y_pred))

XGBoost 정확도: 0.34


In [18]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity


# 곡 제목(파일명) 따로 저장
song_names = df['file_name']

# 특징 벡터 추출
x = df.drop(columns=['file_name','genre'])

# 정규화
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)

# 유사도 계산
similarity_matrix = cosine_similarity(x_scaled)

# 유사도 매트릭스를 DataFrame으로 변환
similarity_df = pd.DataFrame(similarity_matrix, index=song_names, columns=song_names)

# 추천 함수 정의
def recommend(song_name, top_n=5):
    if song_name not in similarity_df.index:
        print(f"'{song_name}' 이 데이터에 없습니다.")
        return

    sims = similarity_df.loc[song_name].drop(index=song_name)
    top_similar = sims.sort_values(ascending=False).head(top_n)

    for i, (title, score) in enumerate(top_similar.items(), 1):
        print(f"{i}. {title} (유사도: {score:.3f})")

# 예시: 특정 곡과 유사한 곡 추천
recommend('Similar_Ballade_00058_Cover_Genre_A.wav')

1. Similar_Trot_00765_Cover_Genre_A.wav (유사도: 0.979)
2. Similar_Trot_00685_Cover_Genre_A.wav (유사도: 0.975)
3. Similar_Trot_00690_Cover_Genre_A.wav (유사도: 0.973)
4. Similar_Hiphop_00676_Cover_Genre_A.wav (유사도: 0.970)
5. Similar_RnB_00964_Cover_Genre_A.wav (유사도: 0.969)


In [None]:
import os
import librosa
import numpy as np
import pandas as pd

audio_folder = "./Training/TS"  # 데이터셋 경로
genres = [d for d in os.listdir(audio_folder) if os.path.isdir(os.path.join(audio_folder, d))]
print("장르 리스트:", genres)

audio_files = []
for root, dirs, files in os.walk(audio_folder):
    for file in files:
        if file.lower().endswith('.wav'):
            audio_files.append(os.path.join(root, file))

data = []

def extract_audio_features(audio_files):
    for audio_file in audio_files:
        y, sr = librosa.load(audio_file, sr=None)
        genre = "Unknown"
        for g in genres:
            if f"/{g}/" in audio_file or audio_file.startswith(os.path.join(audio_folder, g)):
                genre = g
                break
        
        # 피처 추출
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
        rms = librosa.feature.rms(y=y)
        zcr = librosa.feature.zero_crossing_rate(y=y)

        min_frames = min(mfccs.shape[1], chroma.shape[1], spectral_centroid.shape[1],
                         spectral_contrast.shape[1], tonnetz.shape[1], rms.shape[1], zcr.shape[1])

        # 피처 행렬 생성 (프레임별 데이터 유지)
        features_matrix = np.vstack([
            mfccs[:, :min_frames],
            chroma[:, :min_frames],
            spectral_centroid[:, :min_frames],
            spectral_contrast[:, :min_frames],
            tonnetz[:, :min_frames],
            rms[:, :min_frames],
            zcr[:, :min_frames]
        ])

        # 구간별 요약 (평균, 표준편차)
        summary_features = {
            **{f"mfcc_{i+1}_mean": np.mean(mfccs[i, :]) for i in range(mfccs.shape[0])},
            **{f"mfcc_{i+1}_std": np.std(mfccs[i, :]) for i in range(mfccs.shape[0])},
            **{f"chroma_{i+1}_mean": np.mean(chroma[i, :]) for i in range(chroma.shape[0])},
            **{f"chroma_{i+1}_std": np.std(chroma[i, :]) for i in range(chroma.shape[0])},
            "spectral_centroid_mean": np.mean(spectral_centroid),
            "spectral_centroid_std": np.std(spectral_centroid),
            "spectral_contrast_mean": np.mean(spectral_contrast),
            "spectral_contrast_std": np.std(spectral_contrast),
            "tonnetz_mean": np.mean(tonnetz),
            "tonnetz_std": np.std(tonnetz),
            "rms_mean": np.mean(rms),
            "rms_std": np.std(rms),
            "zcr_mean": np.mean(zcr),
            "zcr_std": np.std(zcr),
        }

        file_features = {
            "file_name": os.path.basename(audio_file),
            "genre": genre,
            **summary_features
        }

        data.append(file_features)

# 개선된 피처 추출 실행
extract_audio_features(audio_files)
df = pd.DataFrame(data)
df.to_csv("improved_audio_features.csv", index=False)



장르 리스트: ['Ballade', 'RnB', 'Trot', 'Rock', 'Dance', 'Hiphop']
개선된 데이터셋 생성 완료!


In [30]:
df=pd.read_csv("improved_audio_features.csv")


# 특징과 라벨 분리
x = df.drop(columns=['file_name', 'genre'])
y = df['genre']

# 정규화
scaler = sklearn.preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(x)

# 라벨 인코딩
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 학습/테스트 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_encoded, test_size=0.2, random_state=42)


In [31]:
# Naive Bayes 모델 학습
nb = GaussianNB()
nb.fit(x_train, y_train)

# 예측 및 평가
y_pred = nb.predict(x_test)
print('Naive Bayes 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# 의사결정트리 모델 학습
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train, y_train)

# 예측 및 평가
y_pred = dt.predict(x_test)
print('Decision Tree 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# 확률적 경사 하강법모델 학습
sgd = SGDClassifier(max_iter=1000, random_state=42)
sgd.fit(x_train, y_train)

# 예측 및 평가
y_pred = sgd.predict(x_test)
print('SGD 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 랜덤 포레스트 모델 학습
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# 예측 및 평가
y_pred = rf.predict(x_test)
print('Random Forest 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.neighbors import KNeighborsClassifier
# KNN 모델 학습 (k=5 사용)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

# 예측 및 정확도 평가
y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"KNN 정확도: {accuracy:.4f}")


from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 로지스틱 회귀 모델 학습
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(x_train, y_train)

# 예측 및 평가
y_pred = lr.predict(x_test)
print('Logistic Regression 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 신경망 모델 학습
nn = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
nn.fit(x_train, y_train)

# 예측 및 평가
y_pred = nn.predict(x_test)
print('Neural Network 정확도: %.2f' % accuracy_score(y_test, y_pred))



from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# svm 모델 학습
svm = SVC(kernel='linear', random_state=42)
svm.fit(x_train, y_train)

# 예측 및 평가
y_pred = svm.predict(x_test)
print('SVM 정확도: %.2f' % accuracy_score(y_test, y_pred))



from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# xgboost 모델 학습
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
xgb.fit(x_train, y_train)

# 예측 및 평가
y_pred = xgb.predict(x_test)
print('XGBoost 정확도: %.2f' % accuracy_score(y_test, y_pred))

Naive Bayes 정확도: 0.36
Decision Tree 정확도: 0.23
SGD 정확도: 0.36
Random Forest 정확도: 0.41
KNN 정확도: 0.3596
Logistic Regression 정확도: 0.39




Neural Network 정확도: 0.39
SVM 정확도: 0.39
XGBoost 정확도: 0.42


In [34]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# 데이터 로드
df = pd.read_csv("improved_audio_features.csv")
X = df.drop(columns=["file_name", "genre"])  
y = df["genre"]  

# LabelEncoder로 y(장르) 인코딩
le = LabelEncoder()
y = le.fit_transform(y)

# 학습/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# XGBoost 분류기 학습
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

# 특성 중요도 추출
importances = model.feature_importances_
feature_names = X.columns
importance_df = pd.DataFrame({
    "feature": feature_names,
    "importance": importances
}).sort_values(by="importance", ascending=False)

# 상위 20개 피처 선택
n = 20  
top_features = importance_df.head(n)["feature"].tolist()



# 상위 20개 피처만 남긴 데이터셋 생성
X_top = X[top_features]
df_20 = pd.concat([df[["file_name", "genre"]], X_top], axis=1)

print(df_20.columns)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Index(['file_name', 'genre', 'tonnetz_std', 'spectral_contrast_mean',
       'mfcc_2_mean', 'chroma_11_mean', 'mfcc_12_mean', 'chroma_4_std',
       'spectral_contrast_std', 'chroma_3_mean', 'mfcc_5_std', 'zcr_std',
       'rms_std', 'mfcc_8_mean', 'mfcc_9_mean', 'mfcc_11_mean', 'chroma_5_std',
       'chroma_1_std', 'chroma_8_mean', 'mfcc_6_std', 'mfcc_4_std',
       'mfcc_3_std'],
      dtype='object')


In [35]:
# 특징과 라벨 분리
x = df_20.drop(columns=['file_name', 'genre'])
y = df_20['genre']

# 정규화
scaler = sklearn.preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(x)

# 학습/테스트 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_encoded, test_size=0.2, random_state=42)

# Naive Bayes 모델 학습
nb = GaussianNB()
nb.fit(x_train, y_train)

# 예측 및 평가
y_pred = nb.predict(x_test)
print('Naive Bayes 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# 의사결정트리 모델 학습
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train, y_train)

# 예측 및 평가
y_pred = dt.predict(x_test)
print('Decision Tree 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# 확률적 경사 하강법모델 학습
sgd = SGDClassifier(max_iter=1000, random_state=42)
sgd.fit(x_train, y_train)

# 예측 및 평가
y_pred = sgd.predict(x_test)
print('SGD 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 랜덤 포레스트 모델 학습
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# 예측 및 평가
y_pred = rf.predict(x_test)
print('Random Forest 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.neighbors import KNeighborsClassifier
# KNN 모델 학습 (k=5 사용)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

# 예측 및 정확도 평가
y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"KNN 정확도: {accuracy:.4f}")


from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 로지스틱 회귀 모델 학습
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(x_train, y_train)

# 예측 및 평가
y_pred = lr.predict(x_test)
print('Logistic Regression 정확도: %.2f' % accuracy_score(y_test, y_pred))


from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 신경망 모델 학습
nn = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
nn.fit(x_train, y_train)

# 예측 및 평가
y_pred = nn.predict(x_test)
print('Neural Network 정확도: %.2f' % accuracy_score(y_test, y_pred))



from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# svm 모델 학습
svm = SVC(kernel='linear', random_state=42)
svm.fit(x_train, y_train)

# 예측 및 평가
y_pred = svm.predict(x_test)
print('SVM 정확도: %.2f' % accuracy_score(y_test, y_pred))



from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# xgboost 모델 학습
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
xgb.fit(x_train, y_train)

# 예측 및 평가
y_pred = xgb.predict(x_test)
print('XGBoost 정확도: %.2f' % accuracy_score(y_test, y_pred))

Naive Bayes 정확도: 0.38
Decision Tree 정확도: 0.29
SGD 정확도: 0.24
Random Forest 정확도: 0.39
KNN 정확도: 0.4045
Logistic Regression 정확도: 0.43




Neural Network 정확도: 0.39
SVM 정확도: 0.42
XGBoost 정확도: 0.38
