In [2]:
import pandas as pd
import sklearn
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

df=pd.read_csv("audio_features_with_genre.csv")

# 특징과 라벨 분리
x = df.drop(columns=['file_name', 'genre'])
y = df['genre']

# 정규화
scaler = sklearn.preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(x)

# 라벨 인코딩
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 학습/테스트 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_encoded, test_size=0.2, random_state=42)


In [3]:
# Naive Bayes 모델 학습
nb = GaussianNB()
nb.fit(x_train, y_train)

# 예측 및 평가
y_pred = nb.predict(x_test)
print('Naive Bayes 정확도: %.2f' % accuracy_score(y_test, y_pred))

Naive Bayes 정확도: 0.40


In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# 의사결정트리 모델 학습
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train, y_train)

# 예측 및 평가
y_pred = dt.predict(x_test)
print('Decision Tree 정확도: %.2f' % accuracy_score(y_test, y_pred))

Decision Tree 정확도: 0.30


In [5]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# 확률적 경사 하강법모델 학습
sgd = SGDClassifier(max_iter=1000, random_state=42)
sgd.fit(x_train, y_train)

# 예측 및 평가
y_pred = sgd.predict(x_test)
print('SGD 정확도: %.2f' % accuracy_score(y_test, y_pred))

SGD 정확도: 0.37


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 랜덤 포레스트 모델 학습
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# 예측 및 평가
y_pred = rf.predict(x_test)
print('Random Forest 정확도: %.2f' % accuracy_score(y_test, y_pred))

Random Forest 정확도: 0.40


In [7]:
from sklearn.neighbors import KNeighborsClassifier
# KNN 모델 학습 (k=5 사용)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

# 예측 및 정확도 평가
y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"KNN 정확도: {accuracy:.4f}")

KNN 정확도: 0.3315


In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 로지스틱 회귀 모델 학습
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(x_train, y_train)

# 예측 및 평가
y_pred = lr.predict(x_test)
print('Logistic Regression 정확도: %.2f' % accuracy_score(y_test, y_pred))

Logistic Regression 정확도: 0.43


In [10]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 신경망 모델 학습
nn = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
nn.fit(x_train, y_train)

# 예측 및 평가
y_pred = nn.predict(x_test)
print('Neural Network 정확도: %.2f' % accuracy_score(y_test, y_pred))

Neural Network 정확도: 0.31




In [11]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# svm 모델 학습
svm = SVC(kernel='linear', random_state=42)
svm.fit(x_train, y_train)

# 예측 및 평가
y_pred = svm.predict(x_test)
print('SVM 정확도: %.2f' % accuracy_score(y_test, y_pred))

SVM 정확도: 0.39


In [14]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# xgboost 모델 학습
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
xgb.fit(x_train, y_train)

# 예측 및 평가
y_pred = xgb.predict(x_test)
print('XGBoost 정확도: %.2f' % accuracy_score(y_test, y_pred))

XGBoost 정확도: 0.34


In [17]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity


# 곡 제목(파일명) 따로 저장
song_names = df['file_name']

# 특징 벡터 추출
x = df.drop(columns=['file_name','genre'])

# 정규화
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)

# 유사도 계산
similarity_matrix = cosine_similarity(x_scaled)

# 유사도 매트릭스를 DataFrame으로 변환
similarity_df = pd.DataFrame(similarity_matrix, index=song_names, columns=song_names)

# 추천 함수 정의
def recommend(song_name, top_n=5):
    if song_name not in similarity_df.index:
        print(f"'{song_name}' 이 데이터에 없습니다.")
        return

    sims = similarity_df.loc[song_name].drop(index=song_name)
    top_similar = sims.sort_values(ascending=False).head(top_n)

    for i, (title, score) in enumerate(top_similar.items(), 1):
        print(f"{i}. {title} (유사도: {score:.3f})")

# 예시: 특정 곡과 유사한 곡 추천
recommend('Similar_Ballade_00058_Cover_Genre_A.wav')

1. Similar_Trot_00765_Cover_Genre_A.wav (유사도: 0.979)
2. Similar_Trot_00685_Cover_Genre_A.wav (유사도: 0.975)
3. Similar_Trot_00690_Cover_Genre_A.wav (유사도: 0.973)
4. Similar_Hiphop_00676_Cover_Genre_A.wav (유사도: 0.970)
5. Similar_RnB_00964_Cover_Genre_A.wav (유사도: 0.969)
