In [26]:
import os
import librosa
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft
import librosa.display

plt.figure(dpi=600)
matplotlib.rc("font")
matplotlib.rcParams['axes.unicode_minus']=False

<Figure size 3840x2880 with 0 Axes>

In [27]:
# 文件目录
FilePath = "/Users/qinjianxun/Downloads/audios/archive/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files"
TargetPath = "./data/kaggle"

if not os.path.exists(TargetPath):
    os.makedirs(TargetPath)

# 所有文件名
files = os.listdir(FilePath)

# 所有txt文件
filenames = [f.split('.')[0] for f in files if f.endswith('txt')]

In [28]:
import io
import csv

def write_csv(filename:str, data_list:list):
    '''
    写入csv
    ''' 
    keys = data_list[0].keys()
    with open(filename, 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(data_list)

def print_csv(results: list[dict]):
    '''
    输出csv到终端
    '''
    output = io.StringIO()
    writer = csv.DictWriter(output, fieldnames=results[0].keys(), delimiter=',')
    writer.writeheader()
    writer.writerows(results)

    print(output.getvalue())

# 获取整段音频特征

In [29]:
def run_one(group: str, filename: str):
    y, sr = librosa.load(filename, sr=None)
    
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr_nums = librosa.zero_crossings(y, pad=False)
    zcr = librosa.feature.zero_crossing_rate(y)
    
    result = {
        "group": f'type-{group}',
        "chroma_stft": np.mean(chroma_stft),
        "rmse": np.mean(rmse),
        "spectral_centroid": np.mean(spectral_centroid),
        "spectral_bandwidth": np.mean(spectral_bandwidth),
        "spectral_rolloff": np.mean(spectral_rolloff),
        "zcr_num": sum(zcr_nums),
        "zcr": np.mean(zcr),
    } 
    
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    idx = 0
    for e in mfcc:
        idx += 1
        result[f'mfcc_{idx}'] = np.mean(e)
        
    return result

In [30]:
def main():
    # 存储每个呼吸周期的峰值
    results = []
    path = './data/kaggle/'
    
    for g in ["00", "01", "10", "11"]:
        path_dir = os.path.join(path, g)
        tmp = list(g)
        for f in os.listdir(path_dir):  
            if not f.endswith('.wav'):
                continue
            file_wav = os.path.join(path_dir, f)

            result = run_one(g, file_wav)
            # 添加
            results.append(result)
        print('batch %s done', g)
        
    write_csv("features.csv", results)
    print('All Done')

In [31]:
main()

batch %s done 00
batch %s done 01
batch %s done 10
batch %s done 11
All Done


# 模型训练

In [145]:
import pandas as pd
import numpy as np

In [146]:
df = pd.read_csv("features.csv")
df.head(5)

Unnamed: 0,group,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,spectral_rolloff,zcr_num,zcr,mfcc_1,mfcc_2,...,mfcc_11,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19,mfcc_20
0,type-00,0.556985,0.085817,88.714968,123.714233,158.751371,1206,0.020379,-387.8893,152.08089,...,4.354294,7.36783,7.364809,3.39084,-1.11089,-0.61973,2.793594,4.918705,4.073652,2.081417
1,type-00,0.80808,0.078067,1673.894861,4072.014039,2972.356877,5484,0.006209,-374.35577,87.43144,...,10.431167,9.05808,7.436806,5.599691,3.854623,2.706923,2.44609,2.788692,3.249979,3.392334
2,type-00,0.739392,0.003408,1191.565172,3357.838078,1258.936284,6275,0.007106,-676.18475,153.1949,...,4.379502,3.920803,2.489913,4.005516,4.687814,6.088821,4.997719,4.488901,2.394674,2.082758
3,type-00,0.532201,0.050532,96.479585,135.272491,166.850843,1742,0.022281,-414.85135,144.87567,...,0.799416,4.034758,4.903068,2.081618,-1.771518,-1.294276,2.859949,5.483337,4.025387,2.403556
4,type-00,0.814269,0.063564,1708.851728,4089.351452,2992.052946,6213,0.00704,-392.15018,103.45956,...,12.531859,12.385639,11.274233,7.991354,5.295948,6.008972,6.626599,5.181288,3.607966,3.557192


In [151]:
# 分离feature和labels
GROUP_2_LABEL = {
    "type-00": 0,
    "type-01": 1,
    "type-10": 2,
    "type-11": 3,
}

labels = []
for i in df["group"]:
    labels.append(GROUP_2_LABEL[i])
    
# features
x = df.iloc[:,1:]
# labels
y = np.array(labels)
    
print(x.shape)
print(y.shape)

(920, 27)
(920,)


In [165]:
# 拆分训练接和测试集, 测试集占30%

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3)

print(x_train.shape)
print(x_test.shape)

(644, 27)
(276, 27)


## KNN模型

In [169]:
from sklearn import neighbors
from sklearn.metrics import accuracy_score

def KNN():
    # 创建模型
    classifier=neighbors.KNeighborsClassifier()
    # 训练模型
    classifier.fit(x_train,y_train)
    # 预测
    predictions=classifier.predict(x_test)
    # 评估准确率
    accuracy = accuracy_score(y_test, predictions)
    print(accuracy)

In [170]:
KNN()

0.5036231884057971


## 决策树模型

In [171]:
from sklearn import tree
from sklearn.metrics import accuracy_score

def DecisionTree():
    classifier=tree.DecisionTreeClassifier()
    classifier.fit(x_train,y_train)
    predictions=classifier.predict(x_test)
    accuracy = accuracy_score(y_test, predictions)
    print(accuracy)

In [172]:
DecisionTree()

0.5072463768115942


# 随机森林

In [177]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def RandomForest():
    classifier=RandomForestClassifier()
    classifier.fit(x_train,y_train)
    predictions=classifier.predict(x_test)
    accuracy = accuracy_score(y_test, predictions)
    print(accuracy)

In [178]:
RandomForest()

0.6086956521739131
