In [1]:
import os

import librosa
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

tqdm.pandas()


In [9]:
def get_mfcc(wav_file_path):
    y, sr = librosa.load(wav_file_path, offset=0, duration=30)
    mfcc = np.array(librosa.feature.mfcc(y=y, sr=sr))
    return mfcc


def get_melspectrogram(wav_file_path):
    y, sr = librosa.load(wav_file_path, offset=0, duration=30)
    melspectrogram = np.array(librosa.feature.melspectrogram(y=y, sr=sr))
    return melspectrogram


def get_chroma_vector(wav_file_path):
    y, sr = librosa.load(wav_file_path)
    chroma = np.array(librosa.feature.chroma_stft(y=y, sr=sr))
    return chroma


def get_tonnetz(wav_file_path):
    y, sr = librosa.load(wav_file_path)
    tonnetz = np.array(librosa.feature.tonnetz(y=y, sr=sr))
    return tonnetz


def get_feature(file_path):
    mfcc = get_mfcc(file_path)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_min = mfcc.min(axis=1)
    mfcc_max = mfcc.max(axis=1)
    mfcc_feature = np.concatenate((mfcc_mean, mfcc_min, mfcc_max))

    melspectrogram = get_melspectrogram(file_path)
    melspectrogram_mean = melspectrogram.mean(axis=1)
    melspectrogram_min = melspectrogram.min(axis=1)
    melspectrogram_max = melspectrogram.max(axis=1)
    melspectrogram_feature = np.concatenate(
        (melspectrogram_mean, melspectrogram_min, melspectrogram_max))

    chroma = get_chroma_vector(file_path)
    chroma_mean = chroma.mean(axis=1)
    chroma_min = chroma.min(axis=1)
    chroma_max = chroma.max(axis=1)
    chroma_feature = np.concatenate((chroma_mean, chroma_min, chroma_max))

    tntz = get_tonnetz(file_path)
    tntz_mean = tntz.mean(axis=1)
    tntz_min = tntz.min(axis=1)
    tntz_max = tntz.max(axis=1)
    tntz_feature = np.concatenate((tntz_mean, tntz_min, tntz_max))

    feature = np.concatenate(
        (chroma_feature, melspectrogram_feature, mfcc_feature, tntz_feature))
    return feature

In [10]:
data = []
for class_name in os.listdir("evaluation"):
    for file in os.listdir(os.path.join("evaluation", class_name)):
        data.append((os.path.join("evaluation", class_name, file), class_name))


In [11]:
tqdm.pandas()

eval_df = pd.DataFrame(data, columns=['audio_path', 'source_id'])
eval_df['audio_feature'] = eval_df['audio_path'].progress_apply(
    lambda x: get_feature(x))

  0%|          | 0/15 [00:00<?, ?it/s]

In [5]:
eval_df

Unnamed: 0,audio_path,source_id,audio_feature
0,evaluation\maria\Record_0.wav,maria,"[0.22580918669700623, 0.23982486128807068, 0.1..."
1,evaluation\maria\Record_1.wav,maria,"[0.2748687267303467, 0.2674121558666229, 0.265..."
2,evaluation\maria\Record_2.wav,maria,"[0.21786867082118988, 0.25627416372299194, 0.3..."
3,evaluation\maria\Record_3.wav,maria,"[0.3032059073448181, 0.2491905242204666, 0.236..."
4,evaluation\maria\Record_4.wav,maria,"[0.17937365174293518, 0.1685197949409485, 0.26..."
5,evaluation\me\Record_0.wav,me,"[0.5005908012390137, 0.41948315501213074, 0.35..."
6,evaluation\me\Record_1.wav,me,"[0.3570057451725006, 0.391750305891037, 0.2994..."
7,evaluation\me\Record_2.wav,me,"[0.49438321590423584, 0.5340457558631897, 0.36..."
8,evaluation\me\Record_3.wav,me,"[0.333279013633728, 0.3944181501865387, 0.3920..."
9,evaluation\me\Record_4.wav,me,"[0.3709910213947296, 0.3542977273464203, 0.348..."
