In [2]:
### должны быть установлены перечисленные библиотеки, можно через pip install  ...
### в папке с ноутбуком должна лежать папка data_v_7_stc и всё её содержимое

import glob
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
%matplotlib inline

In [3]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(parent_dir, audio_folder, file_ext="*.wav", train=False):
    features = np.empty((0, 193))
    for file_name in glob.glob(os.path.join(parent_dir, audio_folder, file_ext)):
        mfccs, chroma, mel, contrast,tonnetz = extract_feature(file_name)
        ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        features = np.vstack([features,ext_features])
    
    return np.array(features)

def get_meta_labels(parent_dir, audio_folder, file_ext="*.wav"):
    meta = pd.read_table("data_v_7_stc/meta/meta.txt", header=None)
    train_label_dict = meta.set_index(0).to_dict()[4]
    
    labels = []

    for file_name in tqdm(glob.glob(os.path.join(parent_dir, audio_folder, file_ext))):
        labels.append(train_label_dict[file_name.split('\\')[2]])
    return labels

In [8]:
X = parse_audio_files('data_v_7_stc', 'audio')

In [5]:
X = pd.DataFrame(X)

In [6]:
y = get_meta_labels('data_v_7_stc', 'audio')

y_map = {'background' : 0, 'bags': 1, 'door': 2, 'keyboard': 3, 'knocking_door': 4, 'ring': 5, 'speech': 6, 'tool': 7}
y_classes = [y_map[label] for label in y]

100%|████████████████████████████████████████████████████████████████████████| 11307/11307 [00:00<00:00, 809521.29it/s]


### Модель

In [21]:
### требуется установить lightgbm, можно через pip install lightgbm

from lightgbm import LGBMClassifier

In [44]:
lgbm_cl = LGBMClassifier()
lgbm_cl.fit(X, y_classes)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        learning_rate=0.1, max_depth=-1, min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=100,
        n_jobs=-1, num_leaves=31, objective=None, random_state=None,
        reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
        subsample_for_bin=200000, subsample_freq=1)

In [36]:
y_reverse_map = {0: 'background', 1: 'bags', 2: 'door', 3: 'keyboard', 4: 'knocking_door', 5: 'ring', 6: 'speech', 7: 'tool'}

## Получаем признаки для тестового множества

In [42]:
X_test = parse_audio_files('data_v_7_stc', 'test')

100%|████████████████████████████████████████████████████████████████████████████████| 610/610 [08:15<00:00,  1.23it/s]


In [45]:
test_classes = [y_reverse_map[label] for label in lgbm_cl.predict(X_test)]
test_probas = lgbm_cl.predict_proba(X_test).max(axis=1)

  if diff:


## Записываем ответ

In [48]:
test_files_names = [file_name.split('\\')[2] for file_name in glob.glob(os.path.join('data_v_7_stc', 'test', "*.wav"))]
answer = pd.DataFrame(test_files_names, columns=['file_names'])
answer['probs'] = test_probas
answer['class'] = test_classes
answer.to_csv('result.txt', header=False, sep='\t', index=False)