In [10]:
import os

import librosa
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

tqdm.pandas()


In [11]:
def get_mfcc(wav_file_path):
    y, sr = librosa.load(wav_file_path, offset=0, duration=30)
    mfcc = np.array(librosa.feature.mfcc(y=y, sr=sr))
    return mfcc


def get_melspectrogram(wav_file_path):
    y, sr = librosa.load(wav_file_path, offset=0, duration=30)
    melspectrogram = np.array(librosa.feature.melspectrogram(y=y, sr=sr))
    return melspectrogram


def get_chroma_vector(wav_file_path):
    y, sr = librosa.load(wav_file_path)
    chroma = np.array(librosa.feature.chroma_stft(y=y, sr=sr))
    return chroma


def get_tonnetz(wav_file_path):
    y, sr = librosa.load(wav_file_path)
    tonnetz = np.array(librosa.feature.tonnetz(y=y, sr=sr))
    return tonnetz


def get_feature(file_path):
    mfcc = get_mfcc(file_path)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_min = mfcc.min(axis=1)
    mfcc_max = mfcc.max(axis=1)
    mfcc_feature = np.concatenate((mfcc_mean, mfcc_min, mfcc_max))

    melspectrogram = get_melspectrogram(file_path)
    melspectrogram_mean = melspectrogram.mean(axis=1)
    melspectrogram_min = melspectrogram.min(axis=1)
    melspectrogram_max = melspectrogram.max(axis=1)
    melspectrogram_feature = np.concatenate(
        (melspectrogram_mean, melspectrogram_min, melspectrogram_max))

    chroma = get_chroma_vector(file_path)
    chroma_mean = chroma.mean(axis=1)
    chroma_min = chroma.min(axis=1)
    chroma_max = chroma.max(axis=1)
    chroma_feature = np.concatenate((chroma_mean, chroma_min, chroma_max))

    tntz = get_tonnetz(file_path)
    tntz_mean = tntz.mean(axis=1)
    tntz_min = tntz.min(axis=1)
    tntz_max = tntz.max(axis=1)
    tntz_feature = np.concatenate((tntz_mean, tntz_min, tntz_max))

    feature = np.concatenate(
        (chroma_feature, melspectrogram_feature, mfcc_feature, tntz_feature))
    return feature

In [12]:
os.path.isdir(os.listdir()[0])

False

In [13]:
data = []
for class_name in os.listdir():
    if os.path.isdir(class_name):
        for file in os.listdir(class_name):
            data.append((os.path.join(class_name, file), class_name))


In [16]:
tqdm.pandas()

eval_df = pd.DataFrame(data, columns=['audio_path', 'source_id'])
eval_df['audio_feature'] = eval_df['audio_path'].progress_apply(
    lambda x: get_feature(x))

  0%|          | 0/20 [00:00<?, ?it/s]

In [17]:
eval_df

Unnamed: 0,audio_path,source_id,audio_feature
0,maria\Record_0.wav,maria,"[0.22580918669700623, 0.23982486128807068, 0.1..."
1,maria\Record_1.wav,maria,"[0.2748687267303467, 0.2674121558666229, 0.265..."
2,maria\Record_2.wav,maria,"[0.21786867082118988, 0.25627416372299194, 0.3..."
3,maria\Record_3.wav,maria,"[0.3032059073448181, 0.2491905242204666, 0.236..."
4,maria\Record_4.wav,maria,"[0.17937365174293518, 0.1685197949409485, 0.26..."
5,maria\Record_5.wav,maria,"[0.3789590895175934, 0.22537343204021454, 0.14..."
6,maria\Record_6.wav,maria,"[0.20687556266784668, 0.17880384624004364, 0.1..."
7,maria\Record_7.wav,maria,"[0.16439421474933624, 0.16855499148368835, 0.1..."
8,maria\Record_8.wav,maria,"[0.140591099858284, 0.14607807993888855, 0.191..."
9,maria\Record_9.wav,maria,"[0.20519095659255981, 0.21446122229099274, 0.2..."


In [18]:
eval_df.to_parquet('eval_df.pqt')