In [2]:
import librosa 
import numpy as np
import pandas as pd

def get_features(audio_file):
    """
    Extract features from an audio file using Librosa

    Args:
        audio_file (str): audio file path to load
    """
    def array_map(array):
        return [
                np.min(array), 
                np.max(array),
                np.average(array)
            ]
    
    def array_reduce(a, b):
        return a + b
    
    y, sr = librosa.load(audio_file, sr = None)
    
    mfcc = librosa.feature.mfcc(y, sr)
    chroma_stft = librosa.feature.chroma_stft(y, sr)
    chroma_cqt = librosa.feature.chroma_cqt(y, sr)
    
    total = np.concatenate((chroma_stft, chroma_cqt), axis=0)
    return np.apply_along_axis(array_map, 1, total).flatten()


In [3]:
import sys
sys.path.append("..")

metadata_path = "../data/raw/metadata/UrbanSound8K.csv"
folds = 1
audio_files_path = "../data/raw/audio"

def get_training_dataframe():
    """
    Get the dataframe that represent the training dataset. 
    The structure is [class, feature_1, feature_2, ...]

    Returns:
        Pandas Dataframe: training dataset
    """
    data = pd.read_csv(metadata_path)
    training_data = data[data["fold"] <= folds]
    values = training_data[["slice_file_name", "fold", "classID"]].values

    def m(x):
        audio_path = f"{audio_files_path}/fold{x[1]}/{x[0]}"
        return [x, get_features(audio_path)]
    
    return values
    
    
    applied_values = np.apply_along_axis(m, 1, values)
    flattened = np.apply_along_axis(lambda x: np.insert(x[1], 0, int(x[0][2])), 1, applied_values)
    
    columns = ["class"] + [f"f_{i}" for i in range(len(flattened[0]) -1)]
    return pd.DataFrame(flattened, columns=columns)
    

In [4]:
values = get_training_dataframe()

In [28]:
def m(x):
    audio_path = f"{audio_files_path}/fold{x[1]}/{x[0]}"
    return np.insert(get_features(audio_path), 0, int(x[2]))

applied = []

for i in range(10):
    applied.append(m(values[0]))


[array([3.0000000e+00, 5.4996501e-04, 1.0000000e+00, 5.2156579e-01,
        2.4378455e-04, 1.0000000e+00, 4.6643627e-01, 2.2325404e-04,
        1.0000000e+00, 4.4189936e-01, 2.4095856e-04, 1.0000000e+00,
        4.3655744e-01, 4.1877734e-04, 1.0000000e+00, 4.4139293e-01,
        1.6008995e-03, 1.0000000e+00, 4.5985076e-01, 6.7915488e-04,
        1.0000000e+00, 5.0266671e-01, 1.5713300e-03, 1.0000000e+00,
        4.9499431e-01, 1.5630984e-03, 1.0000000e+00, 5.2057689e-01,
        2.6591271e-03, 1.0000000e+00, 5.5782109e-01, 2.2947788e-03,
        1.0000000e+00, 5.6999159e-01, 8.1672042e-04, 1.0000000e+00,
        5.9048802e-01, 4.1286558e-02, 1.0000000e+00, 4.8811421e-01,
        1.8898871e-02, 1.0000000e+00, 4.7328374e-01, 2.3356535e-02,
        1.0000000e+00, 5.4211581e-01, 3.2247614e-02, 1.0000000e+00,
        5.2966177e-01, 3.4832668e-02, 1.0000000e+00, 4.7061878e-01,
        4.3823812e-02, 9.2045820e-01, 4.6379104e-01, 5.6668647e-02,
        1.0000000e+00, 6.4959824e-01, 4.8702613e

In [19]:
len(values)

873