使用MFCC和ZCR作为特征，分类使用支持向量机和随机森林等机器学习模型进行分类

# 加载数据集

主要分为三步：
1. 读取文件和标签
2. 提取特征
3. 按照8:2划分训练集和验证集

In [1]:
import pandas as pd
import librosa
import numpy as np
from sklearn.model_selection import train_test_split


# 读取CSV文件
data = pd.read_csv('/home/nlp/songcw/data/ESC-50-master/meta/esc50.csv')  # 将'your_file.csv'替换为实际的文件名

# 获取音频文件名和对应的标签
audio_filenames = data['filename'].tolist()
labels = data['target'].tolist()

# 为音频文件名添加前缀路径
audio_files = ["/home/nlp/songcw/data/ESC-50-master/audio/" + filename for filename in audio_filenames]


# 提取MFCC和ZCR特征的函数
def extract_features(audio_files):
    all_mfcc = []
    all_zcr = []
    for file in audio_files:
        y, sr = librosa.load(file)
        # 提取MFCC特征
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        mfcc_mean = np.mean(mfcc, axis=1)
        all_mfcc.append(mfcc_mean)
        # 提取ZCR特征
        zcr = librosa.feature.zero_crossing_rate(y)
        mean_zcr = np.mean(zcr)
        all_zcr.append(mean_zcr)
    return np.array(all_mfcc), np.array(all_zcr)

# 划分数据集为训练集和测试集（这里按8:2划分，可按需调整）
X_mfcc, X_zcr = extract_features(audio_files)
X = np.concatenate((X_mfcc, X_zcr.reshape(-1, 1)), axis=1)  # 拼接MFCC和ZCR特征
y = np.array(labels)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

注意：这里还可以提取更多特征，另外还可以进行数据降维

# 使用支持向量机进行分类

In [2]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 使用SVM进行分类
svm_model = SVC(kernel='linear')  # 可尝试不同核函数，如'rbf'等
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_pred)
print("SVM准确率:", svm_accuracy)

SVM准确率: 0.3425


# 使用随机森林进行分类

In [3]:
from sklearn.ensemble import RandomForestClassifier

# 使用随机森林进行分类
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)  # 可调整树的数量等超参数
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)
print("随机森林准确率:", rf_accuracy)

随机森林准确率: 0.45


# 使用K近邻算法

In [4]:
from sklearn.neighbors import KNeighborsClassifier
# 创建KNN分类器，这里假设K = 5，可根据实际情况调整
knn_model = KNeighborsClassifier(n_neighbors = 5)
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_pred)
print("KNN准确率:", knn_accuracy)

KNN准确率: 0.2725


# 使用朴素贝叶斯

In [5]:
from sklearn.naive_bayes import GaussianNB
# 创建高斯朴素贝叶斯分类器（假设特征服从高斯分布，还有其他类型朴素贝叶斯分类器）
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)
print("朴素贝叶斯准确率:", nb_accuracy)

朴素贝叶斯准确率: 0.2925


# 使用决策树

In [6]:
from sklearn.tree import DecisionTreeClassifier
# 创建决策树分类器，可调整参数如max_depth（树的最大深度）等来优化
dt_model = DecisionTreeClassifier(max_depth = 5)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)
print("决策树准确率:", dt_accuracy)

决策树准确率: 0.1125


# 使用梯度提升树

In [7]:
from sklearn.ensemble import GradientBoostingClassifier
# 创建梯度提升树分类器，可调整参数如n_estimators（树的数量）、learning_rate（学习率）等
gb_model = GradientBoostingClassifier(n_estimators = 100, learning_rate = 0.1)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)
print("梯度提升树准确率:", gb_accuracy)

梯度提升树准确率: 0.3275


# 使用XGBoost

# 使用LightGBM

In [9]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# 其他代码部分（数据读取、特征提取、数据集划分等）同上，此处省略重复代码

# 将训练集数据转换为LightGBM所需的数据格式（Dataset）
lgb_train = lgb.Dataset(X_train, label=y_train)
lgb_eval = lgb.Dataset(X_test, label=y_test, reference=lgb_train)

# 设置LightGBM的参数，可根据实际情况进行调优
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'multiclass',  # 假设是多分类任务，若是二分类可换为'binary'
    'num_class': len(np.unique(y_train)),  # 类别数量
    'learning_rate': 0.1,
    'max_depth': 3
}

# 训练LightGBM模型
gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=lgb_eval)

# 使用训练好的模型进行预测
lgb_pred = gbm.predict(X_test)
lgb_pred = np.argmax(lgb_pred, axis=-1)  # 获取预测类别索引
lgb_accuracy = accuracy_score(y_test, lgb_pred)
print("LightGBM准确率:", lgb_accuracy)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5355
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 21
[LightGBM] [Info] Start training from score -3.851398
[LightGBM] [Info] Start training from score -3.851398
[LightGBM] [Info] Start training from score -3.943772
[LightGBM] [Info] Start training from score -4.081922
[LightGBM] [Info] Start training from score -4.010463
[LightGBM] [Info] Start training from score -4.045554
[LightGBM] [Info] Start training from score -4.045554
[LightGBM] [Info] Start training from score -3.851398
[LightGBM] [Info] Start training from score -3.976562
[LightGBM] [Info] Start training from score -3.851398
[LightGBM] [Info] Start training from score -3.822411
[LightGBM] [Info] Start training from score -4.045554
[LightGBM] [Info] Start training from score -3.943772
[LightGBM]

# 使用AdaBoost

In [10]:
from sklearn.ensemble import AdaBoostClassifier
# 其他代码部分（数据读取、特征提取、数据集划分等）同上，此处省略重复代码

# 使用AdaBoost进行分类，这里不指定base_estimator，用默认的决策树桩作为弱分类器
ada_model = AdaBoostClassifier(n_estimators=50)  # 可调整弱分类器数量等参数
ada_model.fit(X_train, y_train)
ada_pred = ada_model.predict(X_test)
ada_accuracy = accuracy_score(y_test, ada_pred)
print("AdaBoost准确率:", ada_accuracy)

AdaBoost准确率: 0.07


# 使用逻辑回归

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# 其他代码部分（数据读取、特征提取、数据集划分等）同上，此处省略重复代码

# 创建逻辑回归模型，对于多分类设置'multi_class'参数为'ovr'（One-vs-Rest）或'multinomial'等，这里以'ovr'为例
logistic_reg_model = LogisticRegression(multi_class='ovr')
logistic_reg_model.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = logistic_reg_model.predict(X_test)

# 计算准确率来评估模型性能
accuracy = accuracy_score(y_test, y_pred)
print("逻辑回归准确率:", accuracy)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


逻辑回归准确率: 0.285


# 使用K均值聚类

In [12]:
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
# 其他代码部分（数据读取、特征提取、数据集划分等）同上，此处省略重复代码

# 假设我们事先知道大致的类别数量，设置聚类的簇数量K（这里假设为真实的类别数，实际中可能需要探索合适的K值）
n_clusters = len(np.unique(y_train))
kmeans_model = KMeans(n_clusters=n_clusters, random_state=42)

# 对数据进行聚类
kmeans_model.fit(X)

# 获取聚类后的标签（簇分配结果）
cluster_labels = kmeans_model.labels_

# 使用调整兰德系数（Adjusted Rand Score）来衡量聚类结果与真实标签的相似程度，范围在-1到1之间，越接近1表示越好
ari = adjusted_rand_score(y, cluster_labels)
print("K均值聚类调整兰德系数:", ari)

K均值聚类调整兰德系数: 0.0682501510680304
