In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# 載入資料
multi_data = pd.read_csv("data.csv")
binary_data = pd.read_csv("data_2genre.csv")

# 確保 feature 和 label 是分開的
features = [
    "tempo", "beats", "chroma_stft", "rmse", "spectral_centroid", 
    "spectral_bandwidth", "rolloff", "zero_crossing_rate", 
    "mfcc1", "mfcc2", "mfcc3", "mfcc4", "mfcc5", "mfcc6", "mfcc7",
    "mfcc8", "mfcc9", "mfcc10", "mfcc11", "mfcc12", "mfcc13", 
    "mfcc14", "mfcc15", "mfcc16", "mfcc17", "mfcc18", "mfcc19", "mfcc20"
]
# multi_data.info()
# binary_data.info()
# 多分類資料集
X_multi = multi_data[features]
y_multi = multi_data["label"]

# 二元分類資料集
X_binary = binary_data[features]
y_binary = binary_data["label"]

In [3]:
print(y_multi.value_counts())

label
blues        100
classical    100
country      100
disco        100
hiphop       100
jazz         100
metal        100
pop          100
reggae       100
rock         100
Name: count, dtype: int64


In [7]:
# 將資料分成訓練集與測試集
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(
    X_multi, y_multi, test_size=0.2, random_state=42)

X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    X_binary, y_binary, test_size=0.2, random_state=42)
# 正規
scaler = StandardScaler()
X_train_multi = scaler.fit_transform(X_train_multi)
X_test_multi = scaler.transform(X_test_multi)

X_train_binary = scaler.fit_transform(X_train_binary)
X_test_binary = scaler.transform(X_test_binary)

In [9]:
print("------------------------KNeighborsClassifier----------------------")
# 初始化 KNeighborsClassifier 
knn_multi = KNeighborsClassifier(n_neighbors=3)
knn_binary = KNeighborsClassifier(n_neighbors=7)
# 訓練模型
knn_multi.fit(X_train_multi, y_train_multi)
knn_binary.fit(X_train_binary, y_train_binary)
# 預測並評估多分類資料集
y_pred_multi = knn_multi.predict(X_test_multi)
print("KNeighborsClassifier 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))
# 預測並評估二元分類資料集
y_pred_binary = knn_binary.predict(X_test_binary)
print("KNeighborsClassifier 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------KNeighborsClassifier--------------------")

-------------KNeighborsClassifier-----------
KNeighborsClassifier 多分類結果:
              precision    recall  f1-score   support

       blues       0.59      0.85      0.69        20
   classical       0.75      0.92      0.83        13
     country       0.50      0.78      0.61        27
       disco       0.32      0.29      0.30        21
      hiphop       0.35      0.40      0.38        15
        jazz       0.77      0.45      0.57        22
       metal       0.90      0.72      0.80        25
         pop       0.63      0.92      0.75        13
      reggae       0.73      0.35      0.47        23
        rock       0.43      0.29      0.34        21

    accuracy                           0.58       200
   macro avg       0.60      0.60      0.57       200
weighted avg       0.60      0.58      0.57       200

Accuracy: 0.58
KNeighborsClassifier 二元分類結果:
              precision    recall  f1-score   support

           1       0.95      1.00      0.98        21
           2   

In [20]:
print(X_train_multi.min())
print(X_train_binary.min())

-4.865954593721961
-4.148213095228537


In [25]:
print("------------------------MultinomialNB----------------------")
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler

# 創建不同的 scaler
scaler_multi_nb = MinMaxScaler()
scaler_binary_nb = MinMaxScaler()

# 縮放多分類數據
X_train_multi_scaled = scaler_multi_nb.fit_transform(X_train_multi)
X_test_multi_scaled = scaler_multi_nb.transform(X_test_multi)

# 縮放二元分類數據
X_train_binary_scaled = scaler_binary_nb.fit_transform(X_train_binary)
X_test_binary_scaled = scaler_binary_nb.transform(X_test_binary)

# 初始化 MultinomialNB
multi_nb = MultinomialNB()
binary_nb = MultinomialNB()

# 訓練模型
multi_nb.fit(X_train_multi_scaled, y_train_multi)
binary_nb.fit(X_train_binary_scaled, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = multi_nb.predict(X_test_multi_scaled)
print("MultinomialNB 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = binary_nb.predict(X_test_binary_scaled)
print("MultinomialNB 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------MultinomialNB--------------------")


------------------------MultinomialNB----------------------
MultinomialNB 多分類結果:
              precision    recall  f1-score   support

       blues       0.15      0.25      0.19        20
   classical       0.39      0.85      0.54        13
     country       0.00      0.00      0.00        27
       disco       0.33      0.05      0.08        21
      hiphop       0.40      0.40      0.40        15
        jazz       0.45      0.23      0.30        22
       metal       0.51      0.92      0.66        25
         pop       0.28      1.00      0.43        13
      reggae       0.33      0.13      0.19        23
        rock       0.14      0.05      0.07        21

    accuracy                           0.34       200
   macro avg       0.30      0.39      0.29       200
weighted avg       0.29      0.34      0.27       200

Accuracy: 0.34
MultinomialNB 二元分類結果:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        21
           2  

In [12]:
print("------------------------DecisionTreeClassifier----------------------")
from sklearn.tree import DecisionTreeClassifier

# 初始化 DecisionTreeClassifier
tree_multi = DecisionTreeClassifier(max_depth=10)
tree_binary = DecisionTreeClassifier(max_depth=10)

# 訓練模型
tree_multi.fit(X_train_multi, y_train_multi)
tree_binary.fit(X_train_binary, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = tree_multi.predict(X_test_multi)
print("DecisionTreeClassifier 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = tree_binary.predict(X_test_binary)
print("DecisionTreeClassifier 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------DecisionTreeClassifier--------------------")


------------------------DecisionTreeClassifier----------------------
DecisionTreeClassifier 多分類結果:
              precision    recall  f1-score   support

       blues       0.42      0.25      0.31        20
   classical       0.71      0.77      0.74        13
     country       0.47      0.33      0.39        27
       disco       0.31      0.43      0.36        21
      hiphop       0.33      0.47      0.39        15
        jazz       0.33      0.45      0.38        22
       metal       0.76      0.76      0.76        25
         pop       0.56      0.69      0.62        13
      reggae       0.41      0.30      0.35        23
        rock       0.24      0.19      0.21        21

    accuracy                           0.45       200
   macro avg       0.46      0.46      0.45       200
weighted avg       0.45      0.45      0.44       200

Accuracy: 0.445
DecisionTreeClassifier 二元分類結果:
              precision    recall  f1-score   support

           1       1.00      0.95      0

In [13]:
print("------------------------LinearSVC----------------------")
from sklearn.svm import LinearSVC

# 初始化 LinearSVC
svc_multi = LinearSVC(max_iter=10000)
svc_binary = LinearSVC(max_iter=10000)

# 訓練模型
svc_multi.fit(X_train_multi, y_train_multi)
svc_binary.fit(X_train_binary, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = svc_multi.predict(X_test_multi)
print("LinearSVC 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = svc_binary.predict(X_test_binary)
print("LinearSVC 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------LinearSVC--------------------")


------------------------LinearSVC----------------------
LinearSVC 多分類結果:
              precision    recall  f1-score   support

       blues       0.65      0.55      0.59        20
   classical       0.72      1.00      0.84        13
     country       0.58      0.52      0.55        27
       disco       0.67      0.38      0.48        21
      hiphop       0.48      0.67      0.56        15
        jazz       0.67      0.73      0.70        22
       metal       0.79      0.88      0.83        25
         pop       0.61      0.85      0.71        13
      reggae       0.48      0.48      0.48        23
        rock       0.40      0.29      0.33        21

    accuracy                           0.61       200
   macro avg       0.60      0.63      0.61       200
weighted avg       0.60      0.61      0.60       200

Accuracy: 0.61
LinearSVC 二元分類結果:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        21
           2       1.00   

In [14]:
print("------------------------SVC----------------------")
from sklearn.svm import SVC

# 初始化 SVC
svc_multi = SVC(kernel='rbf')
svc_binary = SVC(kernel='rbf')

# 訓練模型
svc_multi.fit(X_train_multi, y_train_multi)
svc_binary.fit(X_train_binary, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = svc_multi.predict(X_test_multi)
print("SVC 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = svc_binary.predict(X_test_binary)
print("SVC 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------SVC--------------------")


------------------------SVC----------------------
SVC 多分類結果:
              precision    recall  f1-score   support

       blues       0.60      0.60      0.60        20
   classical       0.80      0.92      0.86        13
     country       0.54      0.56      0.55        27
       disco       0.50      0.52      0.51        21
      hiphop       0.39      0.60      0.47        15
        jazz       0.67      0.55      0.60        22
       metal       0.72      0.84      0.78        25
         pop       0.72      1.00      0.84        13
      reggae       0.50      0.30      0.38        23
        rock       0.38      0.24      0.29        21

    accuracy                           0.58       200
   macro avg       0.58      0.61      0.59       200
weighted avg       0.57      0.58      0.57       200

Accuracy: 0.585
SVC 二元分類結果:
              precision    recall  f1-score   support

           1       0.95      1.00      0.98        21
           2       1.00      0.95      0.97

In [26]:
print("------------------------MLPClassifier----------------------")
from sklearn.neural_network import MLPClassifier

# 初始化 MLPClassifier
mlp_multi = MLPClassifier(hidden_layer_sizes=(100,), max_iter=7000)
mlp_binary = MLPClassifier(hidden_layer_sizes=(100,), max_iter=7000)

# 訓練模型
mlp_multi.fit(X_train_multi, y_train_multi)
mlp_binary.fit(X_train_binary, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = mlp_multi.predict(X_test_multi)
print("MLPClassifier 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = mlp_binary.predict(X_test_binary)
print("MLPClassifier 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------MLPClassifier--------------------")


------------------------MLPClassifier----------------------
MLPClassifier 多分類結果:
              precision    recall  f1-score   support

       blues       0.73      0.55      0.63        20
   classical       0.82      0.69      0.75        13
     country       0.69      0.74      0.71        27
       disco       0.67      0.48      0.56        21
      hiphop       0.65      0.73      0.69        15
        jazz       0.77      0.77      0.77        22
       metal       0.80      0.80      0.80        25
         pop       0.65      1.00      0.79        13
      reggae       0.52      0.57      0.54        23
        rock       0.43      0.43      0.43        21

    accuracy                           0.67       200
   macro avg       0.67      0.68      0.67       200
weighted avg       0.67      0.67      0.66       200

Accuracy: 0.665
MLPClassifier 二元分類結果:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        21
           2 

In [18]:
print("------------------------RandomForestClassifier----------------------")
from sklearn.ensemble import RandomForestClassifier

# 初始化 RandomForestClassifier
rf_multi = RandomForestClassifier(n_estimators=100, max_depth=10)
rf_binary = RandomForestClassifier(n_estimators=100, max_depth=10)

# 訓練模型
rf_multi.fit(X_train_multi, y_train_multi)
rf_binary.fit(X_train_binary, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = rf_multi.predict(X_test_multi)
print("RandomForestClassifier 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = rf_binary.predict(X_test_binary)
print("RandomForestClassifier 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------RandomForestClassifier--------------------")


------------------------RandomForestClassifier----------------------
RandomForestClassifier 多分類結果:
              precision    recall  f1-score   support

       blues       0.58      0.55      0.56        20
   classical       0.68      1.00      0.81        13
     country       0.50      0.52      0.51        27
       disco       0.82      0.43      0.56        21
      hiphop       0.43      0.60      0.50        15
        jazz       0.62      0.59      0.60        22
       metal       0.75      0.84      0.79        25
         pop       0.61      0.85      0.71        13
      reggae       0.47      0.39      0.43        23
        rock       0.44      0.33      0.38        21

    accuracy                           0.58       200
   macro avg       0.59      0.61      0.59       200
weighted avg       0.59      0.58      0.58       200

Accuracy: 0.585
RandomForestClassifier 二元分類結果:
              precision    recall  f1-score   support

           1       1.00      0.95      0

In [19]:
print("------------------------GradientBoostingClassifier----------------------")
from sklearn.ensemble import GradientBoostingClassifier

# 初始化 GradientBoostingClassifier
gb_multi = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
gb_binary = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)

# 訓練模型
gb_multi.fit(X_train_multi, y_train_multi)
gb_binary.fit(X_train_binary, y_train_binary)

# 預測並評估多分類資料集
y_pred_multi = gb_multi.predict(X_test_multi)
print("GradientBoostingClassifier 多分類結果:")
print(classification_report(y_test_multi, y_pred_multi))
print("Accuracy:", accuracy_score(y_test_multi, y_pred_multi))

# 預測並評估二元分類資料集
y_pred_binary = gb_binary.predict(X_test_binary)
print("GradientBoostingClassifier 二元分類結果:")
print(classification_report(y_test_binary, y_pred_binary))
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("--------------------GradientBoostingClassifier--------------------")


------------------------GradientBoostingClassifier----------------------
GradientBoostingClassifier 多分類結果:
              precision    recall  f1-score   support

       blues       0.59      0.65      0.62        20
   classical       0.72      1.00      0.84        13
     country       0.56      0.52      0.54        27
       disco       0.47      0.38      0.42        21
      hiphop       0.56      0.60      0.58        15
        jazz       0.62      0.59      0.60        22
       metal       0.83      0.80      0.82        25
         pop       0.58      0.85      0.69        13
      reggae       0.71      0.52      0.60        23
        rock       0.29      0.29      0.29        21

    accuracy                           0.59       200
   macro avg       0.59      0.62      0.60       200
weighted avg       0.59      0.59      0.59       200

Accuracy: 0.595
GradientBoostingClassifier 二元分類結果:
              precision    recall  f1-score   support

           1       1.00     